未验证 提交 758f7006 编写于 作者: Y Yibing Liu 提交者: GitHub

Merge pull request #1012 from kuke/train_with_lr_decay

Apply exponential decay on learning rate
...@@ -185,6 +185,9 @@ class AsyncDataReader(object): ...@@ -185,6 +185,9 @@ class AsyncDataReader(object):
corresponding description file. corresponding description file.
drop_frame_len (int): Samples whose label length above the value will be drop_frame_len (int): Samples whose label length above the value will be
dropped.(Using '-1' to disable the policy) dropped.(Using '-1' to disable the policy)
split_sentence_threshold(int): Sentence whose length larger than
the value will trigger split operation.
(Assign -1 to disable split)
proc_num (int): Number of processes for processing data. proc_num (int): Number of processes for processing data.
sample_buffer_size (int): Buffer size to indicate the maximum samples sample_buffer_size (int): Buffer size to indicate the maximum samples
cached. cached.
...@@ -204,6 +207,7 @@ class AsyncDataReader(object): ...@@ -204,6 +207,7 @@ class AsyncDataReader(object):
feature_file_list, feature_file_list,
label_file_list="", label_file_list="",
drop_frame_len=512, drop_frame_len=512,
split_sentence_threshold=1024,
proc_num=10, proc_num=10,
sample_buffer_size=1024, sample_buffer_size=1024,
sample_info_buffer_size=1024, sample_info_buffer_size=1024,
...@@ -214,6 +218,7 @@ class AsyncDataReader(object): ...@@ -214,6 +218,7 @@ class AsyncDataReader(object):
self._feature_file_list = feature_file_list self._feature_file_list = feature_file_list
self._label_file_list = label_file_list self._label_file_list = label_file_list
self._drop_frame_len = drop_frame_len self._drop_frame_len = drop_frame_len
self._split_sentence_threshold = split_sentence_threshold
self._shuffle_block_num = shuffle_block_num self._shuffle_block_num = shuffle_block_num
self._block_info_list = None self._block_info_list = None
self._rng = random.Random(random_seed) self._rng = random.Random(random_seed)
...@@ -262,7 +267,8 @@ class AsyncDataReader(object): ...@@ -262,7 +267,8 @@ class AsyncDataReader(object):
map(lambda info: info[0], bucket_block_info), map(lambda info: info[0], bucket_block_info),
map(lambda info: info[1], bucket_block_info), map(lambda info: info[1], bucket_block_info),
map(lambda info: info[2], bucket_block_info), map(lambda info: info[2], bucket_block_info),
map(lambda info: info[3], bucket_block_info))) map(lambda info: info[3], bucket_block_info),
split_sentence_threshold=self._split_sentence_threshold))
# @TODO make this configurable # @TODO make this configurable
def set_transformers(self, transformers): def set_transformers(self, transformers):
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0
python -u ../../tools/profile.py --feature_lst data/train_feature.lst \ python -u ../../tools/profile.py --feature_lst data/train_feature.lst \
--label_lst data/train_label.lst \ --label_lst data/train_label.lst \
--mean_var data/aishell/global_mean_var \ --mean_var data/global_mean_var \
--parallel \
--frame_dim 80 \ --frame_dim 80 \
--class_num 3040 \ --class_num 3040 \
--batch_size 16
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=4,5,6,7
python -u ../../train.py --train_feature_lst data/train_feature.lst \ python -u ../../train.py --train_feature_lst data/train_feature.lst \
--train_label_lst data/train_label.lst \ --train_label_lst data/train_label.lst \
--val_feature_lst data/val_feature.lst \ --val_feature_lst data/val_feature.lst \
--val_label_lst data/val_label.lst \ --val_label_lst data/val_label.lst \
--mean_var data/aishell/global_mean_var \ --mean_var data/global_mean_var \
--checkpoints checkpoints \ --checkpoints checkpoints \
--frame_dim 80 \ --frame_dim 80 \
--class_num 3040 \ --class_num 3040 \
...@@ -11,4 +11,3 @@ python -u ../../train.py --train_feature_lst data/train_feature.lst \ ...@@ -11,4 +11,3 @@ python -u ../../train.py --train_feature_lst data/train_feature.lst \
--batch_size 64 \ --batch_size 64 \
--learning_rate 6.4e-5 \ --learning_rate 6.4e-5 \
--parallel --parallel
~
...@@ -162,7 +162,12 @@ def infer_from_ckpt(args): ...@@ -162,7 +162,12 @@ def infer_from_ckpt(args):
infer_program = fluid.default_main_program().clone() infer_program = fluid.default_main_program().clone()
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=args.learning_rate,
decay_steps=1879,
decay_rate=1 / 1.2,
staircase=True))
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
......
...@@ -137,7 +137,12 @@ def profile(args): ...@@ -137,7 +137,12 @@ def profile(args):
class_num=args.class_num, class_num=args.class_num,
parallel=args.parallel) parallel=args.parallel)
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=args.learning_rate,
decay_steps=1879,
decay_rate=1 / 1.2,
staircase=True))
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
...@@ -150,7 +155,8 @@ def profile(args): ...@@ -150,7 +155,8 @@ def profile(args):
trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5) trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5)
] ]
data_reader = reader.AsyncDataReader(args.feature_lst, args.label_lst, -1) data_reader = reader.AsyncDataReader(
args.feature_lst, args.label_lst, -1, split_sentence_threshold=1024)
data_reader.set_transformers(ltrans) data_reader.set_transformers(ltrans)
feature_t = fluid.LoDTensor() feature_t = fluid.LoDTensor()
......
...@@ -159,7 +159,12 @@ def train(args): ...@@ -159,7 +159,12 @@ def train(args):
test_program = fluid.default_main_program().clone() test_program = fluid.default_main_program().clone()
#optimizer = fluid.optimizer.Momentum(learning_rate=args.learning_rate, momentum=0.9) #optimizer = fluid.optimizer.Momentum(learning_rate=args.learning_rate, momentum=0.9)
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=args.learning_rate,
decay_steps=1879,
decay_rate=1 / 1.2,
staircase=True))
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
...@@ -186,8 +191,11 @@ def train(args): ...@@ -186,8 +191,11 @@ def train(args):
os.path.exists(args.val_label_lst)): os.path.exists(args.val_label_lst)):
return -1.0, -1.0 return -1.0, -1.0
# test data reader # test data reader
test_data_reader = reader.AsyncDataReader(args.val_feature_lst, test_data_reader = reader.AsyncDataReader(
args.val_label_lst) args.val_feature_lst,
args.val_label_lst,
-1,
split_sentence_threshold=1024)
test_data_reader.set_transformers(ltrans) test_data_reader.set_transformers(ltrans)
test_costs, test_accs = [], [] test_costs, test_accs = [], []
for batch_id, batch_data in enumerate( for batch_id, batch_data in enumerate(
...@@ -212,8 +220,11 @@ def train(args): ...@@ -212,8 +220,11 @@ def train(args):
return np.mean(test_costs), np.mean(test_accs) return np.mean(test_costs), np.mean(test_accs)
# train data reader # train data reader
train_data_reader = reader.AsyncDataReader(args.train_feature_lst, train_data_reader = reader.AsyncDataReader(
args.train_label_lst, -1) args.train_feature_lst,
args.train_label_lst,
-1,
split_sentence_threshold=1024)
train_data_reader.set_transformers(ltrans) train_data_reader.set_transformers(ltrans)
# train # train
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册