未验证 提交 c19f7ac1 编写于 作者: H Hongyu Liu 提交者: GitHub

Merge pull request #1493 from phlrain/add_cudnn_lm

add cudnn lm
...@@ -7,5 +7,6 @@ python train.py \ ...@@ -7,5 +7,6 @@ python train.py \
--data_path data/simple-examples/data/ \ --data_path data/simple-examples/data/ \
--model_type small \ --model_type small \
--use_gpu True \ --use_gpu True \
--rnn_model static \
--enable_ce | python _ce.py --enable_ce | python _ce.py
...@@ -26,7 +26,12 @@ def parse_args(): ...@@ -26,7 +26,12 @@ def parse_args():
"--model_type", "--model_type",
type=str, type=str,
default="small", default="small",
help="model_type [test|small|med|big]") help="model_type [test|small|medium|large]")
parser.add_argument(
"--rnn_model",
type=str,
default="static",
help="model_type [static|padding|cudnn]")
parser.add_argument( parser.add_argument(
"--data_path", type=str, help="all the data for train,valid,test") "--data_path", type=str, help="all the data for train,valid,test")
parser.add_argument('--para_init', action='store_true') parser.add_argument('--para_init', action='store_true')
......
...@@ -28,7 +28,8 @@ def lm_model(hidden_size, ...@@ -28,7 +28,8 @@ def lm_model(hidden_size,
num_layers=2, num_layers=2,
num_steps=20, num_steps=20,
init_scale=0.1, init_scale=0.1,
dropout=None): dropout=None,
rnn_model='static'):
def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
weight_1_arr = [] weight_1_arr = []
weight_2_arr = [] weight_2_arr = []
...@@ -243,7 +244,7 @@ def lm_model(hidden_size, ...@@ -243,7 +244,7 @@ def lm_model(hidden_size,
input=x, input=x,
size=[vocab_size, hidden_size], size=[vocab_size, hidden_size],
dtype='float32', dtype='float32',
is_sparse=True, is_sparse=False,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
...@@ -256,8 +257,21 @@ def lm_model(hidden_size, ...@@ -256,8 +257,21 @@ def lm_model(hidden_size,
dropout_prob=dropout, dropout_prob=dropout,
dropout_implementation='upscale_in_train') dropout_implementation='upscale_in_train')
if rnn_model == "padding":
rnn_out, last_hidden, last_cell = padding_rnn( rnn_out, last_hidden, last_cell = padding_rnn(
x_emb, len=num_steps, init_hidden=init_hidden, init_cell=init_cell) x_emb, len=num_steps, init_hidden=init_hidden, init_cell=init_cell)
elif rnn_model == "static":
rnn_out, last_hidden, last_cell = encoder_static(
x_emb, len=num_steps, init_hidden=init_hidden, init_cell=init_cell)
elif rnn_model == "cudnn":
x_emb = layers.transpose( x_emb, perm=[1, 0, 2])
rnn_out, last_hidden, last_cell = layers.lstm( x_emb, init_hidden, init_cell, num_steps, hidden_size, num_layers, \
is_bidirec=False, \
default_initializer=fluid.initializer.UniformInitializer(low=-init_scale, high=init_scale) )
rnn_out = layers.transpose( rnn_out, perm=[1, 0, 2])
else:
print( "type not support")
return
rnn_out = layers.reshape(rnn_out, shape=[-1, num_steps, hidden_size]) rnn_out = layers.reshape(rnn_out, shape=[-1, num_steps, hidden_size])
......
...@@ -77,6 +77,7 @@ def save_para_npz(train_prog, train_exe): ...@@ -77,6 +77,7 @@ def save_para_npz(train_prog, train_exe):
def train(): def train():
args = parse_args() args = parse_args()
model_type = args.model_type model_type = args.model_type
rnn_model = args.rnn_model
logger = logging.getLogger("lm") logger = logging.getLogger("lm")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
formatter = logging.Formatter( formatter = logging.Formatter(
...@@ -157,7 +158,8 @@ def train(): ...@@ -157,7 +158,8 @@ def train():
num_layers=num_layers, num_layers=num_layers,
num_steps=num_steps, num_steps=num_steps,
init_scale=init_scale, init_scale=init_scale,
dropout=dropout) dropout=dropout,
rnn_model=rnn_model)
# clone from default main program and use it as the validation program # clone from default main program and use it as the validation program
main_program = fluid.default_main_program() main_program = fluid.default_main_program()
inference_program = fluid.default_main_program().clone(for_test=True) inference_program = fluid.default_main_program().clone(for_test=True)
...@@ -206,18 +208,19 @@ def train(): ...@@ -206,18 +208,19 @@ def train():
def eval(data): def eval(data):
# when eval the batch_size set to 1 # when eval the batch_size set to 1
eval_data_iter = reader.get_data_iter(data, 1, num_steps) eval_data_iter = reader.get_data_iter(data, batch_size, num_steps)
total_loss = 0.0 total_loss = 0.0
iters = 0 iters = 0
init_hidden = np.zeros((num_layers, 1, hidden_size), dtype='float32') init_hidden = np.zeros((num_layers, batch_size, hidden_size), dtype='float32')
init_cell = np.zeros((num_layers, 1, hidden_size), dtype='float32') init_cell = np.zeros((num_layers, batch_size, hidden_size), dtype='float32')
for batch_id, batch in enumerate(eval_data_iter): for batch_id, batch in enumerate(eval_data_iter):
input_data_feed = prepare_input( input_data_feed = prepare_input(
batch, init_hidden, init_cell, epoch_id, with_lr=False) batch, init_hidden, init_cell, epoch_id, with_lr=False)
fetch_outs = exe.run( fetch_outs = exe.run(
inference_program, inference_program,
feed=input_data_feed, feed=input_data_feed,
fetch_list=[loss.name, last_hidden.name, last_cell.name]) fetch_list=[loss.name, last_hidden.name, last_cell.name],
use_program_cache=True)
cost_train = np.array(fetch_outs[0]) cost_train = np.array(fetch_outs[0])
init_hidden = np.array(fetch_outs[1]) init_hidden = np.array(fetch_outs[1])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册