未验证 提交 55737d59 编写于 作者: Y Yibing Liu 提交者: GitHub

Merge pull request #1446 from kuke/dam_fixs

Fix test & feed type and disable grad clip temporarily
...@@ -25,7 +25,7 @@ class Net(object): ...@@ -25,7 +25,7 @@ class Net(object):
# turns ids # turns ids
shapes = [[-1, self._max_turn_len, 1] shapes = [[-1, self._max_turn_len, 1]
for i in six.moves.xrange(self._max_turn_num)] for i in six.moves.xrange(self._max_turn_num)]
dtypes = ["int32" for i in six.moves.xrange(self._max_turn_num)] dtypes = ["int64" for i in six.moves.xrange(self._max_turn_num)]
# turns mask # turns mask
shapes += [[-1, self._max_turn_len, 1] shapes += [[-1, self._max_turn_len, 1]
for i in six.moves.xrange(self._max_turn_num)] for i in six.moves.xrange(self._max_turn_num)]
...@@ -34,7 +34,7 @@ class Net(object): ...@@ -34,7 +34,7 @@ class Net(object):
# response ids, response mask, label # response ids, response mask, label
shapes += [[-1, self._max_turn_len, 1], [-1, self._max_turn_len, 1], shapes += [[-1, self._max_turn_len, 1], [-1, self._max_turn_len, 1],
[-1, 1]] [-1, 1]]
dtypes += ["int32", "float32", "float32"] dtypes += ["int64", "float32", "float32"]
py_reader = fluid.layers.py_reader( py_reader = fluid.layers.py_reader(
capacity=capacity, capacity=capacity,
...@@ -60,7 +60,7 @@ class Net(object): ...@@ -60,7 +60,7 @@ class Net(object):
for i in six.moves.xrange(self._max_turn_num): for i in six.moves.xrange(self._max_turn_num):
name = "turn_%d" % i name = "turn_%d" % i
turn = fluid.layers.data( turn = fluid.layers.data(
name=name, shape=[self._max_turn_len, 1], dtype="int32") name=name, shape=[self._max_turn_len, 1], dtype="int64")
self.turns_data.append(turn) self.turns_data.append(turn)
self._feed_names.append(name) self._feed_names.append(name)
...@@ -73,7 +73,7 @@ class Net(object): ...@@ -73,7 +73,7 @@ class Net(object):
self._feed_names.append(name) self._feed_names.append(name)
self.response = fluid.layers.data( self.response = fluid.layers.data(
name="response", shape=[self._max_turn_len, 1], dtype="int32") name="response", shape=[self._max_turn_len, 1], dtype="int64")
self.response_mask = fluid.layers.data( self.response_mask = fluid.layers.data(
name="response_mask", name="response_mask",
shape=[self._max_turn_len, 1], shape=[self._max_turn_len, 1],
...@@ -141,7 +141,7 @@ class Net(object): ...@@ -141,7 +141,7 @@ class Net(object):
mask_cache=mask_cache) mask_cache=mask_cache)
Hu_stack.append(Hu) Hu_stack.append(Hu)
# cross attention # cross attention
r_a_t_stack = [] r_a_t_stack = []
t_a_r_stack = [] t_a_r_stack = []
for index in six.moves.xrange(self._stack_num + 1): for index in six.moves.xrange(self._stack_num + 1):
...@@ -183,7 +183,7 @@ class Net(object): ...@@ -183,7 +183,7 @@ class Net(object):
t_a_r = fluid.layers.concat(input=t_a_r_stack, axis=1) t_a_r = fluid.layers.concat(input=t_a_r_stack, axis=1)
r_a_t = fluid.layers.concat(input=r_a_t_stack, axis=1) r_a_t = fluid.layers.concat(input=r_a_t_stack, axis=1)
# sim shape: [batch_size, 2*(stack_num+1), max_turn_len, max_turn_len] # sim shape: [batch_size, 2*(stack_num+1), max_turn_len, max_turn_len]
sim = fluid.layers.matmul( sim = fluid.layers.matmul(
x=t_a_r, y=r_a_t, transpose_y=True, alpha=1 / np.sqrt(200.0)) x=t_a_r, y=r_a_t, transpose_y=True, alpha=1 / np.sqrt(200.0))
sim_turns.append(sim) sim_turns.append(sim)
......
...@@ -126,6 +126,7 @@ def test(args): ...@@ -126,6 +126,7 @@ def test(args):
dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size, dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size,
args.emb_size, args.stack_num, args.channel1_num, args.emb_size, args.stack_num, args.channel1_num,
args.channel2_num) args.channel2_num)
dam.create_data_layers()
loss, logits = dam.create_network() loss, logits = dam.create_network()
loss.persistable = True loss.persistable = True
...@@ -144,7 +145,7 @@ def test(args): ...@@ -144,7 +145,7 @@ def test(args):
staircase=True)) staircase=True))
optimizer.minimize(loss) optimizer.minimize(loss)
# The fethced loss is wrong when mem opt is enabled # The fethced loss is wrong when mem opt is enabled
fluid.memory_optimize(fluid.default_main_program()) fluid.memory_optimize(fluid.default_main_program())
if args.use_cuda: if args.use_cuda:
...@@ -191,7 +192,8 @@ def test(args): ...@@ -191,7 +192,8 @@ def test(args):
feed_list = [] feed_list = []
for dev in six.moves.xrange(dev_count): for dev in six.moves.xrange(dev_count):
index = it * dev_count + dev index = it * dev_count + dev
feed_dict = reader.make_one_batch_input(test_batches, index) batch_data = reader.make_one_batch_input(test_batches, index)
feed_dict = dict(zip(dam.get_feed_names(), batch_data))
feed_list.append(feed_dict) feed_list.append(feed_dict)
predicts = test_exe.run(feed=feed_list, fetch_list=[logits.name]) predicts = test_exe.run(feed=feed_list, fetch_list=[logits.name])
......
...@@ -203,8 +203,8 @@ def train(args): ...@@ -203,8 +203,8 @@ def train(args):
loss.persistable = True loss.persistable = True
logits.persistable = True logits.persistable = True
# gradient clipping # gradient clipping
fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue( #fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
max=1.0, min=-1.0)) # max=1.0, min=-1.0))
optimizer = fluid.optimizer.Adam( optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay( learning_rate=fluid.layers.exponential_decay(
...@@ -324,7 +324,7 @@ def train(args): ...@@ -324,7 +324,7 @@ def train(args):
evaluate(score_path, result_file_path) evaluate(score_path, result_file_path)
return step return step
# train on one epoch with pyreader # train on one epoch with pyreader
def train_with_pyreader(step): def train_with_pyreader(step):
def data_provider(): def data_provider():
for index in six.moves.xrange(batch_num): for index in six.moves.xrange(batch_num):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册