提交 ca62595a 编写于 作者: Y yinhaofeng

change

上级 bc49b2dc
......@@ -16,14 +16,9 @@ for line in lines:
text = line[0].split(" ") + line[1].split(" ")
for word in text:
if word in word_dict:
word_dict[word] = word_dict[word] + 1
continue
else:
word_dict[word] = 1
word_list = word_dict.items()
word_list = sorted(word_dict.items(), key=lambda item: item[1], reverse=True)
word_list_ids = range(1, len(word_list) + 1)
word_dict = dict(zip([x[0] for x in word_list], word_list_ids))
word_dict[word] = len(word_dict) + 1
f = open("./zhidao", "r")
lines = f.readlines()
......@@ -74,12 +69,11 @@ for query in test_query:
if query not in neg_dict:
continue
for neg in neg_dict[query]:
test_set.append([query, pos, 0])
test_set.append([query, neg, 0])
random.shuffle(test_set)
#训练集中的query,pos,neg转化为词袋
f = open("train.txt", "w")
f = open("train.txt", "w")
for line in train_set:
query = line[0].strip().split(" ")
pos = line[1].strip().split(" ")
......
......@@ -51,8 +51,8 @@ python -m paddlerec.run -m models/contentunderstanding/match-pyramid/config.yaml
### 模型效果 (测试)
| 数据集 | 模型 | auc | map |
| 数据集 | 模型 | 正逆序比 | map |
| :------------------: | :--------------------: | :---------: |:---------: |
| zhidao | DSSM | 0.55 | -- |
| zhidao | DSSM | 2.25 | -- |
| Letor07 | match-pyramid | -- | 0.42 |
| zhidao | multiview-simnet | 0.59 | -- |
| zhidao | multiview-simnet | 1.72 | -- |
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册