diff --git a/fluid/PaddleNLP/Senta b/fluid/PaddleNLP/Senta deleted file mode 160000 index 870651e257750f2c237f0b0bc9a27e5d062d1909..0000000000000000000000000000000000000000 --- a/fluid/PaddleNLP/Senta +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 870651e257750f2c237f0b0bc9a27e5d062d1909 diff --git a/fluid/PaddleNLP/SimNet b/fluid/PaddleNLP/SimNet deleted file mode 160000 index 4dbe7f7b0e76c188eb7f448d104f0165f0a12229..0000000000000000000000000000000000000000 --- a/fluid/PaddleNLP/SimNet +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4dbe7f7b0e76c188eb7f448d104f0165f0a12229 diff --git a/fluid/PaddleNLP/machine_reading_comprehension/run.py b/fluid/PaddleNLP/machine_reading_comprehension/run.py index 0ab05b9052c8d54d036d625ccd05f743ae47781a..8871ab9998942d92516035b78d5e327d4e2271ef 100644 --- a/fluid/PaddleNLP/machine_reading_comprehension/run.py +++ b/fluid/PaddleNLP/machine_reading_comprehension/run.py @@ -312,6 +312,15 @@ def validation(inference_program, avg_cost, s_probs, e_probs, match, feed_order, return ave_loss, bleu_rouge +def l2_loss(train_prog): + param_list = train_prog.block(0).all_parameters() + para_sum = [] + for para in param_list: + para_mul = fluid.layers.elementwise_mul(x=para, y=para, axis=0) + para_sum.append(fluid.layers.reduce_sum(input=para_mul, dim=None)) + return fluid.layers.sums(para_sum) * 0.5 + + def train(logger, args): logger.info('Load data_set and vocab...') with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin: @@ -349,24 +358,22 @@ def train(logger, args): # build optimizer if args.optim == 'sgd': optimizer = fluid.optimizer.SGD( - learning_rate=args.learning_rate, - regularization=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=args.weight_decay)) + learning_rate=args.learning_rate) elif args.optim == 'adam': optimizer = fluid.optimizer.Adam( - learning_rate=args.learning_rate, - regularization=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=args.weight_decay)) - + learning_rate=args.learning_rate) elif args.optim == 'rprop': optimizer = fluid.optimizer.RMSPropOptimizer( - learning_rate=args.learning_rate, - regularization=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=args.weight_decay)) + learning_rate=args.learning_rate) else: logger.error('Unsupported optimizer: {}'.format(args.optim)) exit(-1) - optimizer.minimize(avg_cost) + if args.weight_decay > 0.0: + obj_func = avg_cost + args.weight_decay * l2_loss(main_program) + optimizer.minimize(obj_func) + else: + obj_func = avg_cost + optimizer.minimize(obj_func) # initialize parameters place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace() @@ -406,7 +413,7 @@ def train(logger, args): feed_data = batch_reader(batch_list, args) fetch_outs = parallel_executor.run( feed=list(feeder.feed_parallel(feed_data, dev_count)), - fetch_list=[avg_cost.name], + fetch_list=[obj_func.name], return_numpy=False) cost_train = np.array(fetch_outs[0]).mean() total_num += args.batch_size * dev_count diff --git a/fluid/PaddleNLP/machine_reading_comprehension/run.sh b/fluid/PaddleNLP/machine_reading_comprehension/run.sh index a8241d05c2b6dca7915f2f6164f26e3e5938ab73..cc381c9ecdcfe1b547e0c11fe8d7fe6149248b21 100644 --- a/fluid/PaddleNLP/machine_reading_comprehension/run.sh +++ b/fluid/PaddleNLP/machine_reading_comprehension/run.sh @@ -18,5 +18,5 @@ python run.py \ --max_p_len 500 \ --max_q_len 60 \ --max_a_len 200 \ ---weight_decay 0.0 \ +--weight_decay 0.0001 \ --drop_rate 0.2 $@\