update format and readme

bd3ad6fd · hetianjian · 9d143d04 · bd3ad6fd · bd3ad6fd · bd3ad6fd
5 changed file
--- a/fluid/PaddleRec/gnn/README.md
+++ b/fluid/PaddleRec/gnn/README.md
@@ -27,7 +27,7 @@ SR-GNN模型的介绍可以参阅论文[Session-based Recommendation with Graph

 然后通过GNN，学习每个node（item）的隐向量表示

-然后通过一个attention机制得到每个session的embedding
+然后通过一个attention架构模型得到每个session的embedding

 最后通过一个softmax层进行全表预测

@@ -50,10 +50,25 @@ cd ..
 ```
 运行之后在data文件夹下会产生diginetica文件夹，里面包含config.txt、test.txt  train.txt三个文件

+生成的数据格式为:(session_list,
+label_list)。
+
+其中session_list是一个session的列表，其中每个元素都是一个list，代表不同的session。label_list是一个列表，每个位置的元素是session_list中对应session的label。
+
+例子：session_list=[[1,2,3], [4], [7,9]]。代表这个session_list包含3个session，第一个session包含的item序列是1,2,3，第二个session只有1个item 4，第三个session包含的item序列是7，9。
+
+label_list = [6, 9,
+1]。代表[1,2,3]这个session的预测label值应该为6，后两个以此类推。
+
+提示：
+
+* 如果您想使用自己业务场景下的数据，只要令数据满足上述格式要求即可
+* 本例中的train.txt和test.txt两个文件均为二进制文件
+

 ## 训练

-可以参考下面不同场景下的运行命令就行训练，还可以指定诸如batch_size，lr等参数，具体的配置说明可通过运行下列代码查看
+可以参考下面不同场景下的运行命令进行训练，还可以指定诸如batch_size，lr(learning rate)等参数，具体的配置说明可通过运行下列代码查看
 ```
 python train.py -h
 ```

--- a/fluid/PaddleRec/gnn/infer.py
+++ b/fluid/PaddleRec/gnn/infer.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
 import argparse
 import logging
 import numpy as np

--- a/fluid/PaddleRec/gnn/network.py
+++ b/fluid/PaddleRec/gnn/network.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
 import paddle
 import math
 import numpy as np
@@ -77,17 +91,17 @@ def network(batch_size, items_num, hidden_size, step):
            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  #[batch_size, uniq_max, h]

-        state_adj_in = layers.matmul(adj_in,
-                                     state_in)  #[batch_size, uniq_max, h]
-        state_adj_out = layers.matmul(adj_out, state_out)
+        state_adj_in = layers.matmul(adj_in, state_in)  #[batch_size, uniq_max, h]
+        state_adj_out = layers.matmul(adj_out, state_out)   #[batch_size, uniq_max, h]

        gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)

        gru_input = layers.reshape(x=gru_input, shape=[-1, hidden_size * 2])
-        gru_fc = layers.fc(input=gru_input,
-                           name="gru_fc",
-                           size=3 * hidden_size,
-                           bias_attr=False)
+        gru_fc = layers.fc(
+            input=gru_input,
+            name="gru_fc",
+            size=3 * hidden_size,
+            bias_attr=False)
        pre_state, _, _ = fluid.layers.gru_unit(
            input=gru_fc,
            hidden=layers.reshape(
@@ -111,42 +125,44 @@ def network(batch_size, items_num, hidden_size, step):
        bias_attr=False,
        act=None,
        num_flatten_dims=2,
-        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[batch_size, -1(seq_max), h]
-    last_fc = layers.fc(input=last,
-                        name="last_fc",
-                        size=hidden_size,
-                        bias_attr=False,
-                        act=None,
-                        num_flatten_dims=1,
-                        param_attr=fluid.ParamAttr(
-                            initializer=fluid.initializer.Uniform(
-                                low=-stdv, high=stdv)))  #[bathc_size, h]
+    last_fc = layers.fc(
+        input=last,
+        name="last_fc",
+        size=hidden_size,
+        bias_attr=False,
+        act=None,
+        num_flatten_dims=1,
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Uniform(
+            low=-stdv, high=stdv)))  #[bathc_size, h]
+
+    seq_fc_t = layers.transpose(
+        seq_fc, perm=[1, 0, 2])  #[-1(seq_max), batch_size, h]
+    add = layers.elementwise_add(
+        seq_fc_t, last_fc)  #[-1(seq_max), batch_size, h]
    b = layers.create_parameter(
        shape=[hidden_size],
        dtype='float32',
        default_initializer=fluid.initializer.Constant(value=0.0))  #[h]
+    add = layers.elementwise_add(add, b)  #[-1(seq_max), batch_size, h]

-    seq_fc_t = layers.transpose(
-        seq_fc, perm=[0, 2, 1])  #[batch_size, h, -1(seq_max)]
-
-    add = layers.elementwise_add(
-        seq_fc_t, last_fc, axis=0)  #[batch_size, h, -1(seq_max)]
-    add = layers.elementwise_add(add, b, axis=1)  #[batch_size, h, -1(seq_max)]
-
-    add_sigmoid = layers.sigmoid(add)  #[batch_size, h, -1(seq_max)]
+    add_sigmoid = layers.sigmoid(add) #[-1(seq_max), batch_size, h] 
    add_sigmoid = layers.transpose(
-        add_sigmoid, perm=[0, 2, 1])  #[batch_size, -1(seq_max), h]
-
-    weight = layers.fc(input=add_sigmoid,
-                       name="weight_fc",
-                       size=1,
-                       act=None,
-                       num_flatten_dims=2,
-                       bias_attr=False,
-                       param_attr=fluid.ParamAttr(
-                           initializer=fluid.initializer.Uniform(
-                               low=-stdv, high=stdv)))  #[batch_size, -1, 1]
+        add_sigmoid, perm=[1, 0, 2])  #[batch_size, -1(seq_max), h]
+
+    weight = layers.fc(
+        input=add_sigmoid,
+        name="weight_fc",
+        size=1,
+        act=None,
+        num_flatten_dims=2,
+        bias_attr=False,
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Uniform(
+                low=-stdv, high=stdv)))  #[batch_size, -1, 1]
    weight *= mask
    weight_mask = layers.elementwise_mul(seq, weight, axis=0)
    global_attention = layers.reduce_sum(weight_mask, dim=1)

--- a/fluid/PaddleRec/gnn/reader.py
+++ b/fluid/PaddleRec/gnn/reader.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
 import numpy as np
 import copy
 import random

--- a/fluid/PaddleRec/gnn/train.py
+++ b/fluid/PaddleRec/gnn/train.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
 import numpy as np
 import os
 from functools import partial