diff --git a/fluid/deep_attention_matching_net/douban/test.sh b/fluid/deep_attention_matching_net/douban/test.sh index caed27a2aac272df81c6b6cc7369c4f13fd0e83f..248682cea644b8a8b83620bba37a4836f537efe2 100644 --- a/fluid/deep_attention_matching_net/douban/test.sh +++ b/fluid/deep_attention_matching_net/douban/test.sh @@ -1,7 +1,8 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3 -python -u test_and_evaluate.py --use_cuda \ +python -u ../test_and_evaluate.py --use_cuda \ + --ext_eval \ --data_path ./data/data.pkl \ - --save_path ./ \ + --save_path ./eval_10000 \ --model_path models/step_10000 \ --batch_size 100 \ --vocab_size 172130 \ diff --git a/fluid/deep_attention_matching_net/douban/train.sh b/fluid/deep_attention_matching_net/douban/train.sh index fecc98afeaf37c4d7b4e2fa440f63904401fac6b..1642d8a9902979cc914c497343d4d09b66e4f79b 100644 --- a/fluid/deep_attention_matching_net/douban/train.sh +++ b/fluid/deep_attention_matching_net/douban/train.sh @@ -1,6 +1,8 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3 python -u ../train_and_evaluate.py --use_cuda \ --data_path ./data/data.pkl \ + --ext_eval \ + --word_emb_init ./data/word_embedding.pkl \ --save_path ./models \ --batch_size 100 \ --vocab_size 172130 \ diff --git a/fluid/deep_attention_matching_net/model.py b/fluid/deep_attention_matching_net/model.py index ac98ccc3b3247813a3be1bdb7646a43d5cd2c5d2..dcca6a248b00d0c26432d2e3f42cfff40cada874 100644 --- a/fluid/deep_attention_matching_net/model.py +++ b/fluid/deep_attention_matching_net/model.py @@ -131,6 +131,6 @@ class Net(object): sim = fluid.layers.concat(input=sim_turns, axis=2) # for douban - final_info = layers.cnn_3d(sim, 16, 16) + final_info = layers.cnn_3d(sim, 32, 16) loss, logits = layers.loss(final_info, label) return loss, logits diff --git a/fluid/deep_attention_matching_net/test_and_evaluate.py b/fluid/deep_attention_matching_net/test_and_evaluate.py index d842b493819eb0f771887fa3c1428767917836a1..13450db196e1c8e2017a44ac23d29fd164a1c29f 100644 --- a/fluid/deep_attention_matching_net/test_and_evaluate.py +++ b/fluid/deep_attention_matching_net/test_and_evaluate.py @@ -8,7 +8,6 @@ import paddle.fluid as fluid import utils.reader as reader import cPickle as pickle from utils.util import print_arguments -import utils.evaluation as eva from model import Net @@ -50,6 +49,10 @@ def parse_args(): '--use_cuda', action='store_true', help='If set, use cuda for training.') + parser.add_argument( + '--ext_eval', + action='store_true', + help='If set, use MAP, MRR ect for evaluation.') parser.add_argument( '--max_turn_num', type=int, @@ -147,6 +150,11 @@ def test(args): train_data, val_data, test_data = pickle.load(open(args.data_path, 'rb')) print("finish loading data ...") + if args.ext_eval: + import utils.douban_evaluation as eva + else: + import utils.evaluation as eva + test_batches = reader.build_batches(test_data, data_conf) test_batch_num = len(test_batches["response"]) diff --git a/fluid/deep_attention_matching_net/train_and_evaluate.py b/fluid/deep_attention_matching_net/train_and_evaluate.py index 7ce7d7a375e8f976f26e207d0b474787fbd171c1..9153051d0b13a4e60b0d037d4941deba1b66f1a8 100644 --- a/fluid/deep_attention_matching_net/train_and_evaluate.py +++ b/fluid/deep_attention_matching_net/train_and_evaluate.py @@ -8,7 +8,6 @@ import paddle.fluid as fluid import utils.reader as reader import cPickle as pickle from utils.util import print_arguments -import utils.evaluation as eva from model import Net @@ -34,7 +33,7 @@ def parse_args(): parser.add_argument( '--data_path', type=str, - default="data/ubuntu/data_small.pkl", + default="data/data_small.pkl", help='Path to training data. (default: %(default)s)') parser.add_argument( '--save_path', @@ -45,6 +44,10 @@ def parse_args(): '--use_cuda', action='store_true', help='If set, use cuda for training.') + parser.add_argument( + '--ext_eval', + action='store_true', + help='If set, use MAP, MRR ect for evaluation.') parser.add_argument( '--max_turn_num', type=int, @@ -74,7 +77,7 @@ def parse_args(): '--_EOS_', type=int, default=28270, - help='The id for end of sentence in vocabulary.') + help='The id for the end of sentence in vocabulary.') parser.add_argument( '--stack_num', type=int, @@ -140,9 +143,15 @@ def train(args): main_program=test_program, share_vars_from=train_exe) + if args.ext_eval: + import utils.douban_evaluation as eva + else: + import utils.evaluation as eva + if args.word_emb_init is not None: print("start loading word embedding init ...") - word_emb = pickle.load(open(args.word_emb_init, 'rb')).astype('float32') + word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'))).astype( + 'float32') print("finish loading word embedding init ...") print("start loading data ...") diff --git a/fluid/deep_attention_matching_net/ubuntu/train.sh b/fluid/deep_attention_matching_net/ubuntu/train.sh index 78e866e832b0d064669b7ef2cfa6810cd3c01f4f..446fbf749c63787611a4e4defe9cc95045f2b408 100644 --- a/fluid/deep_attention_matching_net/ubuntu/train.sh +++ b/fluid/deep_attention_matching_net/ubuntu/train.sh @@ -1,6 +1,7 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3 python -u ../train_and_evaluate.py --use_cuda \ --data_path ./data/data.pkl \ + --word_emb_init ./data/word_embedding.pkl \ --save_path ./models \ --batch_size 100 \ --vocab_size 434512 \ diff --git a/fluid/deep_attention_matching_net/utils/douban_evaluation.py b/fluid/deep_attention_matching_net/utils/douban_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..4d8fe400f03b96a4d785ab376eb8c1aac61ce111 --- /dev/null +++ b/fluid/deep_attention_matching_net/utils/douban_evaluation.py @@ -0,0 +1,85 @@ +import sys +import numpy as np +from sklearn.metrics import average_precision_score + + +def mean_average_precision(sort_data): + #to do + count_1 = 0 + sum_precision = 0 + for index in range(len(sort_data)): + if sort_data[index][1] == 1: + count_1 += 1 + sum_precision += 1.0 * count_1 / (index + 1) + return sum_precision / count_1 + + +def mean_reciprocal_rank(sort_data): + sort_lable = [s_d[1] for s_d in sort_data] + assert 1 in sort_lable + return 1.0 / (1 + sort_lable.index(1)) + + +def precision_at_position_1(sort_data): + if sort_data[0][1] == 1: + return 1 + else: + return 0 + + +def recall_at_position_k_in_10(sort_data, k): + sort_lable = [s_d[1] for s_d in sort_data] + select_lable = sort_lable[:k] + return 1.0 * select_lable.count(1) / sort_lable.count(1) + + +def evaluation_one_session(data): + sort_data = sorted(data, key=lambda x: x[0], reverse=True) + m_a_p = mean_average_precision(sort_data) + m_r_r = mean_reciprocal_rank(sort_data) + p_1 = precision_at_position_1(sort_data) + r_1 = recall_at_position_k_in_10(sort_data, 1) + r_2 = recall_at_position_k_in_10(sort_data, 2) + r_5 = recall_at_position_k_in_10(sort_data, 5) + return m_a_p, m_r_r, p_1, r_1, r_2, r_5 + + +def evaluate(file_path): + sum_m_a_p = 0 + sum_m_r_r = 0 + sum_p_1 = 0 + sum_r_1 = 0 + sum_r_2 = 0 + sum_r_5 = 0 + i = 0 + total_num = 0 + with open(file_path, 'r') as infile: + for line in infile: + if i % 10 == 0: + data = [] + + tokens = line.strip().split('\t') + data.append((float(tokens[0]), int(tokens[1]))) + if i % 10 == 9: + total_num += 1 + m_a_p, m_r_r, p_1, r_1, r_2, r_5 = evaluation_one_session(data) + sum_m_a_p += m_a_p + sum_m_r_r += m_r_r + sum_p_1 += p_1 + sum_r_1 += r_1 + sum_r_2 += r_2 + sum_r_5 += r_5 + i += 1 + #print('total num: %s' %total_num) + #print('MAP: %s' %(1.0*sum_m_a_p/total_num)) + #print('MRR: %s' %(1.0*sum_m_r_r/total_num)) + #print('P@1: %s' %(1.0*sum_p_1/total_num)) + return (1.0 * sum_m_a_p / total_num, 1.0 * sum_m_r_r / total_num, + 1.0 * sum_p_1 / total_num, 1.0 * sum_r_1 / total_num, + 1.0 * sum_r_2 / total_num, 1.0 * sum_r_5 / total_num) + + +if __name__ == '__main__': + result = evaluate(sys.argv[1]) + for r in result: + print(r)