提交 d9cb9684 编写于 作者: Y Yibing Liu

Refactor model config script

上级 c9e35e62
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def stacked_lstmp_model(hidden_dim,
proj_dim,
stacked_num,
class_num=1749,
is_train=True):
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
seq_conv1 = fluid.layers.sequence_conv(
input=feature,
num_filters=1024,
filter_size=3,
filter_stride=1,
bias_attr=True)
bn1 = fluid.layers.batch_norm(
input=seq_conv1,
act="sigmoid",
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=True)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
act="sigmoid",
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
if not is_train: return feature, prediction
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return prediction, label, avg_cost
......@@ -2,6 +2,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import numpy as np
import argparse
import time
......@@ -13,6 +14,8 @@ import data_utils.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.trans_add_delta as trans_add_delta
import data_utils.trans_splice as trans_splice
import data_utils.data_reader as reader
from model import stacked_lstmp_model
from utils import print_arguments, lodtensor_to_ndarray
def parse_args():
......@@ -42,6 +45,11 @@ def parse_args():
type=int,
default=100,
help='Epoch number to train. (default: %(default)d)')
parser.add_argument(
'--print_per_batches',
type=int,
default=100,
help='Interval to print training accuracy. (default: %(default)d)')
parser.add_argument(
'--learning_rate',
type=float,
......@@ -54,94 +62,28 @@ def parse_args():
choices=['CPU', 'GPU'],
help='The device type. (default: %(default)s)')
parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.')
'--mean_var',
type=str,
default='data/global_mean_var_search26kHr',
help='mean var path')
parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.')
'--feature_lst',
type=str,
default='data/feature.lst',
help='feature list path.')
parser.add_argument(
'--use_nvprof',
action='store_true',
help='If set, use nvprof for CUDA.')
parser.add_argument('--mean_var', type=str, help='mean var path')
parser.add_argument('--feature_lst', type=str, help='mean var path')
parser.add_argument('--label_lst', type=str, help='mean var path')
'--label_lst',
type=str,
default='data/label.lst',
help='label list path.')
args = parser.parse_args()
return args
def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def dynamic_lstmp_model(hidden_dim,
proj_dim,
stacked_num,
class_num=1749,
is_train=True):
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
seq_conv1 = fluid.layers.sequence_conv(
input=feature,
num_filters=1024,
filter_size=3,
filter_stride=1,
bias_attr=True)
bn1 = fluid.layers.batch_norm(
input=seq_conv1,
act="sigmoid",
is_test=False,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=True)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
act="sigmoid",
is_test=False,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
if not is_train: return feature, prediction
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return prediction, label, avg_cost
def train(args):
if args.use_cprof:
pr = cProfile.Profile()
pr.enable()
"""train in loop."""
prediction, label, avg_cost = dynamic_lstmp_model(
prediction, label, avg_cost = stacked_lstmp_model(
args.hidden_dim, args.proj_dim, args.stacked_num)
adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
......@@ -173,7 +115,6 @@ def train(args):
res_label = fluid.LoDTensor()
for pass_id in xrange(args.pass_num):
pass_start_time = time.time()
words_seen = 0
accuracy.reset(exe)
batch_id = 0
while True:
......@@ -188,40 +129,28 @@ def train(args):
res_label.set_lod([lod])
batch_id += 1
words_seen += lod[-1]
loss, acc = exe.run(
fluid.default_main_program(),
feed={"feature": res_feature,
"label": res_label},
fetch_list=[avg_cost] + accuracy.metrics,
return_numpy=False)
train_acc = accuracy.eval(exe)
print("acc:", lodtensor_to_ndarray(loss))
_, acc = exe.run(fluid.default_main_program(),
feed={"feature": res_feature,
"label": res_label},
fetch_list=[avg_cost] + accuracy.metrics,
return_numpy=False)
if batch_id > 0 and (batch_id % args.print_per_batches == 0):
print("\nBatch %d, training acc: %f" %
(batch_id, lodtensor_to_ndarray(acc)[0]))
else:
sys.stdout.write('.')
sys.stdout.flush()
pass_end_time = time.time()
time_consumed = pass_end_time - pass_start_time
words_per_sec = words_seen / time_consumed
def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
# need to add test logic (kuke)
print("\nPass %d, time: %fs, test accuracy: 0.0f\n" %
(pass_id, time_consumed))
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
if args.infer_only:
pass
else:
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
train(args)
else:
train(args)
train(args)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import argparse
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册