change

ca62595a · yinhaofeng · bc49b2dc · ca62595a · ca62595a
隐藏空白更改
内联并排

Showing with 6 addition and 12 deletion

models/match/dssm/data/preprocess.py models/match/dssm/data/preprocess.py +3 -9

models/match/readme.md models/match/readme.md +3 -3

未找到文件。
--- a/models/match/dssm/data/preprocess.py
+++ b/models/match/dssm/data/preprocess.py
@@ -16,14 +16,9 @@ for line in lines:
    text = line[0].split(" ") + line[1].split(" ")
    for word in text:
        if word in word_dict:
-            word_dict[word] = word_dict[word] + 1
+            continue
        else:
-            word_dict[word] = 1
-
-word_list = word_dict.items()
-word_list = sorted(word_dict.items(), key=lambda item: item[1], reverse=True)
-word_list_ids = range(1, len(word_list) + 1)
-word_dict = dict(zip([x[0] for x in word_list], word_list_ids))
+            word_dict[word] = len(word_dict) + 1

 f = open("./zhidao", "r")
 lines = f.readlines()
@@ -74,12 +69,11 @@ for query in test_query:
    if query not in neg_dict:
        continue
    for neg in neg_dict[query]:
-        test_set.append([query, pos, 0])
+        test_set.append([query, neg, 0])
 random.shuffle(test_set)

 #训练集中的query,pos,neg转化为词袋
 f = open("train.txt", "w")
-f = open("train.txt", "w")
 for line in train_set:
    query = line[0].strip().split(" ")
    pos = line[1].strip().split(" ")

--- a/models/match/readme.md
+++ b/models/match/readme.md
@@ -51,8 +51,8 @@ python -m paddlerec.run -m models/contentunderstanding/match-pyramid/config.yaml

 ### 模型效果 (测试)

-|       数据集        |       模型       |      auc          |       map       |  
+|       数据集        |       模型       |      正逆序比          |       map       |  
 | :------------------: | :--------------------: | :---------: |:---------: |
-|       zhidao       |       DSSM       |       0.55        |       --          | 
+|       zhidao       |       DSSM       |       2.25        |       --          | 
 |       Letor07        |       match-pyramid       |       --        |      0.42          | 
-|       zhidao        |       multiview-simnet       |       0.59        |       --          |
+|       zhidao        |       multiview-simnet       |       1.72        |       --          |