未验证 提交 ddf57836 编写于 作者: J jiaozhenyu 提交者: GitHub

Merge pull request #9812 from jshower/develop

Changing network configuration, avoid nan 
...@@ -37,7 +37,7 @@ depth = 8 ...@@ -37,7 +37,7 @@ depth = 8
mix_hidden_lr = 1e-3 mix_hidden_lr = 1e-3
IS_SPARSE = True IS_SPARSE = True
PASS_NUM = 10 PASS_NUM = 100
BATCH_SIZE = 10 BATCH_SIZE = 10
embedding_name = 'emb' embedding_name = 'emb'
...@@ -77,7 +77,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, ...@@ -77,7 +77,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
emb_layers.append(mark_embedding) emb_layers.append(mark_embedding)
hidden_0_layers = [ hidden_0_layers = [
fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers fluid.layers.fc(input=emb, size=hidden_dim, act='tanh')
for emb in emb_layers
] ]
hidden_0 = fluid.layers.sums(input=hidden_0_layers) hidden_0 = fluid.layers.sums(input=hidden_0_layers)
...@@ -94,8 +95,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, ...@@ -94,8 +95,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
for i in range(1, depth): for i in range(1, depth):
mix_hidden = fluid.layers.sums(input=[ mix_hidden = fluid.layers.sums(input=[
fluid.layers.fc(input=input_tmp[0], size=hidden_dim), fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act='tanh'),
fluid.layers.fc(input=input_tmp[1], size=hidden_dim) fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh')
]) ])
lstm = fluid.layers.dynamic_lstm( lstm = fluid.layers.dynamic_lstm(
...@@ -109,8 +110,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, ...@@ -109,8 +110,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
input_tmp = [mix_hidden, lstm] input_tmp = [mix_hidden, lstm]
feature_out = fluid.layers.sums(input=[ feature_out = fluid.layers.sums(input=[
fluid.layers.fc(input=input_tmp[0], size=label_dict_len), fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act='tanh'),
fluid.layers.fc(input=input_tmp[1], size=label_dict_len) fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act='tanh')
]) ])
return feature_out return feature_out
...@@ -171,7 +172,7 @@ def train(use_cuda, save_dirname=None, is_local=True): ...@@ -171,7 +172,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
# check other optimizers and check why out will be NAN # check other optimizers and check why out will be NAN
sgd_optimizer = fluid.optimizer.SGD( sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay( learning_rate=fluid.layers.exponential_decay(
learning_rate=0.0001, learning_rate=0.01,
decay_steps=100000, decay_steps=100000,
decay_rate=0.5, decay_rate=0.5,
staircase=True)) staircase=True))
...@@ -233,7 +234,7 @@ def train(use_cuda, save_dirname=None, is_local=True): ...@@ -233,7 +234,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
print("second per batch: " + str((time.time( print("second per batch: " + str((time.time(
) - start_time) / batch_id)) ) - start_time) / batch_id))
# Set the threshold low to speed up the CI test # Set the threshold low to speed up the CI test
if float(pass_precision) > 0.05: if float(pass_precision) > 0.01:
if save_dirname is not None: if save_dirname is not None:
# TODO(liuyiqun): Change the target to crf_decode # TODO(liuyiqun): Change the target to crf_decode
fluid.io.save_inference_model(save_dirname, [ fluid.io.save_inference_model(save_dirname, [
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册