提交 d9cb9684 编写于 作者: Y Yibing Liu

Refactor model config script

上级 c9e35e62
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def stacked_lstmp_model(hidden_dim,
proj_dim,
stacked_num,
class_num=1749,
is_train=True):
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
seq_conv1 = fluid.layers.sequence_conv(
input=feature,
num_filters=1024,
filter_size=3,
filter_stride=1,
bias_attr=True)
bn1 = fluid.layers.batch_norm(
input=seq_conv1,
act="sigmoid",
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=True)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
act="sigmoid",
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
if not is_train: return feature, prediction
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return prediction, label, avg_cost
...@@ -2,6 +2,7 @@ from __future__ import absolute_import ...@@ -2,6 +2,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import sys
import numpy as np import numpy as np
import argparse import argparse
import time import time
...@@ -13,6 +14,8 @@ import data_utils.trans_mean_variance_norm as trans_mean_variance_norm ...@@ -13,6 +14,8 @@ import data_utils.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.trans_add_delta as trans_add_delta import data_utils.trans_add_delta as trans_add_delta
import data_utils.trans_splice as trans_splice import data_utils.trans_splice as trans_splice
import data_utils.data_reader as reader import data_utils.data_reader as reader
from model import stacked_lstmp_model
from utils import print_arguments, lodtensor_to_ndarray
def parse_args(): def parse_args():
...@@ -42,6 +45,11 @@ def parse_args(): ...@@ -42,6 +45,11 @@ def parse_args():
type=int, type=int,
default=100, default=100,
help='Epoch number to train. (default: %(default)d)') help='Epoch number to train. (default: %(default)d)')
parser.add_argument(
'--print_per_batches',
type=int,
default=100,
help='Interval to print training accuracy. (default: %(default)d)')
parser.add_argument( parser.add_argument(
'--learning_rate', '--learning_rate',
type=float, type=float,
...@@ -54,94 +62,28 @@ def parse_args(): ...@@ -54,94 +62,28 @@ def parse_args():
choices=['CPU', 'GPU'], choices=['CPU', 'GPU'],
help='The device type. (default: %(default)s)') help='The device type. (default: %(default)s)')
parser.add_argument( parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.') '--mean_var',
type=str,
default='data/global_mean_var_search26kHr',
help='mean var path')
parser.add_argument( parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.') '--feature_lst',
type=str,
default='data/feature.lst',
help='feature list path.')
parser.add_argument( parser.add_argument(
'--use_nvprof', '--label_lst',
action='store_true', type=str,
help='If set, use nvprof for CUDA.') default='data/label.lst',
parser.add_argument('--mean_var', type=str, help='mean var path') help='label list path.')
parser.add_argument('--feature_lst', type=str, help='mean var path')
parser.add_argument('--label_lst', type=str, help='mean var path')
args = parser.parse_args() args = parser.parse_args()
return args return args
def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def dynamic_lstmp_model(hidden_dim,
proj_dim,
stacked_num,
class_num=1749,
is_train=True):
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
seq_conv1 = fluid.layers.sequence_conv(
input=feature,
num_filters=1024,
filter_size=3,
filter_stride=1,
bias_attr=True)
bn1 = fluid.layers.batch_norm(
input=seq_conv1,
act="sigmoid",
is_test=False,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=True)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
act="sigmoid",
is_test=False,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
if not is_train: return feature, prediction
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return prediction, label, avg_cost
def train(args): def train(args):
if args.use_cprof: """train in loop."""
pr = cProfile.Profile()
pr.enable()
prediction, label, avg_cost = dynamic_lstmp_model( prediction, label, avg_cost = stacked_lstmp_model(
args.hidden_dim, args.proj_dim, args.stacked_num) args.hidden_dim, args.proj_dim, args.stacked_num)
adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
...@@ -173,7 +115,6 @@ def train(args): ...@@ -173,7 +115,6 @@ def train(args):
res_label = fluid.LoDTensor() res_label = fluid.LoDTensor()
for pass_id in xrange(args.pass_num): for pass_id in xrange(args.pass_num):
pass_start_time = time.time() pass_start_time = time.time()
words_seen = 0
accuracy.reset(exe) accuracy.reset(exe)
batch_id = 0 batch_id = 0
while True: while True:
...@@ -188,40 +129,28 @@ def train(args): ...@@ -188,40 +129,28 @@ def train(args):
res_label.set_lod([lod]) res_label.set_lod([lod])
batch_id += 1 batch_id += 1
_, acc = exe.run(fluid.default_main_program(),
words_seen += lod[-1] feed={"feature": res_feature,
"label": res_label},
loss, acc = exe.run( fetch_list=[avg_cost] + accuracy.metrics,
fluid.default_main_program(), return_numpy=False)
feed={"feature": res_feature,
"label": res_label}, if batch_id > 0 and (batch_id % args.print_per_batches == 0):
fetch_list=[avg_cost] + accuracy.metrics, print("\nBatch %d, training acc: %f" %
return_numpy=False) (batch_id, lodtensor_to_ndarray(acc)[0]))
train_acc = accuracy.eval(exe) else:
print("acc:", lodtensor_to_ndarray(loss)) sys.stdout.write('.')
sys.stdout.flush()
pass_end_time = time.time() pass_end_time = time.time()
time_consumed = pass_end_time - pass_start_time time_consumed = pass_end_time - pass_start_time
words_per_sec = words_seen / time_consumed # need to add test logic (kuke)
print("\nPass %d, time: %fs, test accuracy: 0.0f\n" %
(pass_id, time_consumed))
def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
if __name__ == '__main__': if __name__ == '__main__':
args = parse_args() args = parse_args()
print_arguments(args) print_arguments(args)
if args.infer_only: train(args)
pass
else:
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
train(args)
else:
train(args)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import argparse
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册