未验证 提交 758f7006 编写于 作者: Y Yibing Liu 提交者: GitHub

Merge pull request #1012 from kuke/train_with_lr_decay

Apply exponential decay on learning rate
......@@ -185,6 +185,9 @@ class AsyncDataReader(object):
corresponding description file.
drop_frame_len (int): Samples whose label length above the value will be
dropped.(Using '-1' to disable the policy)
split_sentence_threshold(int): Sentence whose length larger than
the value will trigger split operation.
(Assign -1 to disable split)
proc_num (int): Number of processes for processing data.
sample_buffer_size (int): Buffer size to indicate the maximum samples
cached.
......@@ -204,6 +207,7 @@ class AsyncDataReader(object):
feature_file_list,
label_file_list="",
drop_frame_len=512,
split_sentence_threshold=1024,
proc_num=10,
sample_buffer_size=1024,
sample_info_buffer_size=1024,
......@@ -214,6 +218,7 @@ class AsyncDataReader(object):
self._feature_file_list = feature_file_list
self._label_file_list = label_file_list
self._drop_frame_len = drop_frame_len
self._split_sentence_threshold = split_sentence_threshold
self._shuffle_block_num = shuffle_block_num
self._block_info_list = None
self._rng = random.Random(random_seed)
......@@ -262,7 +267,8 @@ class AsyncDataReader(object):
map(lambda info: info[0], bucket_block_info),
map(lambda info: info[1], bucket_block_info),
map(lambda info: info[2], bucket_block_info),
map(lambda info: info[3], bucket_block_info)))
map(lambda info: info[3], bucket_block_info),
split_sentence_threshold=self._split_sentence_threshold))
# @TODO make this configurable
def set_transformers(self, transformers):
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
export CUDA_VISIBLE_DEVICES=0
python -u ../../tools/profile.py --feature_lst data/train_feature.lst \
--label_lst data/train_label.lst \
--mean_var data/aishell/global_mean_var \
--parallel \
--mean_var data/global_mean_var \
--frame_dim 80 \
--class_num 3040 \
--batch_size 16
export CUDA_VISIBLE_DEVICES=0,1,2,3
export CUDA_VISIBLE_DEVICES=4,5,6,7
python -u ../../train.py --train_feature_lst data/train_feature.lst \
--train_label_lst data/train_label.lst \
--val_feature_lst data/val_feature.lst \
--val_label_lst data/val_label.lst \
--mean_var data/aishell/global_mean_var \
--mean_var data/global_mean_var \
--checkpoints checkpoints \
--frame_dim 80 \
--class_num 3040 \
......@@ -11,4 +11,3 @@ python -u ../../train.py --train_feature_lst data/train_feature.lst \
--batch_size 64 \
--learning_rate 6.4e-5 \
--parallel
~
......@@ -162,7 +162,12 @@ def infer_from_ckpt(args):
infer_program = fluid.default_main_program().clone()
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=args.learning_rate,
decay_steps=1879,
decay_rate=1 / 1.2,
staircase=True))
optimizer.minimize(avg_cost)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
......
......@@ -137,7 +137,12 @@ def profile(args):
class_num=args.class_num,
parallel=args.parallel)
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=args.learning_rate,
decay_steps=1879,
decay_rate=1 / 1.2,
staircase=True))
optimizer.minimize(avg_cost)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
......@@ -150,7 +155,8 @@ def profile(args):
trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5)
]
data_reader = reader.AsyncDataReader(args.feature_lst, args.label_lst, -1)
data_reader = reader.AsyncDataReader(
args.feature_lst, args.label_lst, -1, split_sentence_threshold=1024)
data_reader.set_transformers(ltrans)
feature_t = fluid.LoDTensor()
......
......@@ -159,7 +159,12 @@ def train(args):
test_program = fluid.default_main_program().clone()
#optimizer = fluid.optimizer.Momentum(learning_rate=args.learning_rate, momentum=0.9)
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=args.learning_rate,
decay_steps=1879,
decay_rate=1 / 1.2,
staircase=True))
optimizer.minimize(avg_cost)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
......@@ -186,8 +191,11 @@ def train(args):
os.path.exists(args.val_label_lst)):
return -1.0, -1.0
# test data reader
test_data_reader = reader.AsyncDataReader(args.val_feature_lst,
args.val_label_lst)
test_data_reader = reader.AsyncDataReader(
args.val_feature_lst,
args.val_label_lst,
-1,
split_sentence_threshold=1024)
test_data_reader.set_transformers(ltrans)
test_costs, test_accs = [], []
for batch_id, batch_data in enumerate(
......@@ -212,8 +220,11 @@ def train(args):
return np.mean(test_costs), np.mean(test_accs)
# train data reader
train_data_reader = reader.AsyncDataReader(args.train_feature_lst,
args.train_label_lst, -1)
train_data_reader = reader.AsyncDataReader(
args.train_feature_lst,
args.train_label_lst,
-1,
split_sentence_threshold=1024)
train_data_reader.set_transformers(ltrans)
# train
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册