Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
2e093390
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2e093390
编写于
5月 29, 2020
作者:
F
frankwhzhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ssr
上级
f4cb25b4
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
150 addition
and
142 deletion
+150
-142
models/recall/ssr/config.yaml
models/recall/ssr/config.yaml
+46
-34
models/recall/ssr/model.py
models/recall/ssr/model.py
+104
-108
未找到文件。
models/recall/ssr/config.yaml
浏览文件 @
2e093390
...
...
@@ -12,43 +12,55 @@
# See the License for the specific language governing permissions and
# limitations under the License.
workspace
:
"
paddlerec.models.recall.ssr"
evaluate
:
reader
:
batch_size
:
1
class
:
"
{workspace}/ssr_infer_reader.py"
test_data_path
:
"
{workspace}/data/train"
is_return_numpy
:
True
dataset
:
-
name
:
dataset_train
batch_size
:
5
type
:
QueueDataset
data_path
:
"
{workspace}/data/train"
data_converter
:
"
{workspace}/ssr_reader.py"
-
name
:
dataset_infer
batch_size
:
5
type
:
QueueDataset
data_path
:
"
{workspace}/data/test"
data_converter
:
"
{workspace}/ssr_infer_reader.py"
train
:
trainer
:
# for cluster training
strategy
:
"
async"
hyper_parameters
:
vocab_size
:
1000
emb_dim
:
128
hidden_size
:
100
optimizer
:
class
:
adagrad
learning_rate
:
0.01
strategy
:
async
#use infer_runner mode and modify 'phase' below if infer
mode
:
train_runner
#mode: infer_runner
runner
:
-
name
:
train_runner
class
:
single_train
device
:
cpu
epochs
:
3
workspace
:
"
paddlerec.models.recall.ssr"
save_checkpoint_interval
:
2
save_inference_interval
:
4
save_checkpoint_path
:
"
increment"
save_inference_path
:
"
inference"
print_interval
:
10
-
name
:
infer_runner
class
:
single_infer
init_model_path
:
"
increment/0"
device
:
cpu
epochs
:
3
reader
:
batch_size
:
5
class
:
"
{workspace}/ssr_reader.py"
train_data_path
:
"
{workspace}/data/train"
model
:
models
:
"
{workspace}/model.py"
hyper_parameters
:
vocab_size
:
1000
emb_dim
:
128
hidden_size
:
100
learning_rate
:
0.01
optimizer
:
adagrad
save
:
increment
:
dirname
:
"
increment"
epoch_interval
:
2
save_last
:
True
inference
:
dirname
:
"
inference"
epoch_interval
:
4
save_last
:
True
phase
:
-
name
:
train
model
:
"
{workspace}/model.py"
dataset_name
:
dataset_train
thread_num
:
1
#- name: infer
# model: "{workspace}/model.py"
# dataset_name: dataset_infer
# thread_num: 1
models/recall/ssr/model.py
浏览文件 @
2e093390
...
...
@@ -20,85 +20,45 @@ from paddlerec.core.utils import envs
from
paddlerec.core.model
import
Model
as
ModelBase
class
BowEncoder
(
object
):
""" bow-encoder """
def
__init__
(
self
):
self
.
param_name
=
""
def
forward
(
self
,
emb
):
return
fluid
.
layers
.
sequence_pool
(
input
=
emb
,
pool_type
=
'sum'
)
class
GrnnEncoder
(
object
):
""" grnn-encoder """
def
__init__
(
self
,
param_name
=
"grnn"
,
hidden_size
=
128
):
self
.
param_name
=
param_name
self
.
hidden_size
=
hidden_size
def
forward
(
self
,
emb
):
fc0
=
fluid
.
layers
.
fc
(
input
=
emb
,
size
=
self
.
hidden_size
*
3
,
param_attr
=
self
.
param_name
+
"_fc.w"
,
bias_attr
=
False
)
gru_h
=
fluid
.
layers
.
dynamic_gru
(
input
=
fc0
,
size
=
self
.
hidden_size
,
is_reverse
=
False
,
param_attr
=
self
.
param_name
+
".param"
,
bias_attr
=
self
.
param_name
+
".bias"
)
return
fluid
.
layers
.
sequence_pool
(
input
=
gru_h
,
pool_type
=
'max'
)
class
PairwiseHingeLoss
(
object
):
def
__init__
(
self
,
margin
=
0.8
):
self
.
margin
=
margin
def
forward
(
self
,
pos
,
neg
):
loss_part1
=
fluid
.
layers
.
elementwise_sub
(
tensor
.
fill_constant_batch_size_like
(
input
=
pos
,
shape
=
[
-
1
,
1
],
value
=
self
.
margin
,
dtype
=
'float32'
),
pos
)
loss_part2
=
fluid
.
layers
.
elementwise_add
(
loss_part1
,
neg
)
loss_part3
=
fluid
.
layers
.
elementwise_max
(
tensor
.
fill_constant_batch_size_like
(
input
=
loss_part2
,
shape
=
[
-
1
,
1
],
value
=
0.0
,
dtype
=
'float32'
),
loss_part2
)
return
loss_part3
class
Model
(
ModelBase
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
def
get_correct
(
self
,
x
,
y
):
less
=
tensor
.
cast
(
cf
.
less_than
(
x
,
y
),
dtype
=
'float32'
)
correct
=
fluid
.
layers
.
reduce_sum
(
less
)
return
correct
def
train
(
self
):
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
,
None
,
self
.
_namespace
)
emb_dim
=
envs
.
get_global_env
(
"hyper_parameters.emb_dim"
,
None
,
self
.
_namespace
)
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.hidden_size"
,
None
,
self
.
_namespace
)
emb_shape
=
[
vocab_size
,
emb_dim
]
def
_init_hyper_parameters
(
self
):
self
.
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
)
self
.
emb_dim
=
envs
.
get_global_env
(
"hyper_parameters.emb_dim"
)
self
.
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.hidden_size"
)
def
input_data
(
self
,
is_infer
=
False
,
**
kwargs
):
if
is_infer
:
user_data
=
fluid
.
data
(
name
=
"user"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
all_item_data
=
fluid
.
data
(
name
=
"all_item"
,
shape
=
[
None
,
self
.
vocab_size
],
dtype
=
"int64"
)
pos_label
=
fluid
.
data
(
name
=
"pos_label"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
)
return
[
user_data
,
all_item_data
,
pos_label
]
else
:
user_data
=
fluid
.
data
(
name
=
"user"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
pos_item_data
=
fluid
.
data
(
name
=
"p_item"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
neg_item_data
=
fluid
.
data
(
name
=
"n_item"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
return
[
user_data
,
pos_item_data
,
neg_item_data
]
def
net
(
self
,
inputs
,
is_infer
=
False
):
if
is_infer
:
self
.
_infer_net
(
inputs
)
return
user_data
=
inputs
[
0
]
pos_item_data
=
inputs
[
1
]
neg_item_data
=
inputs
[
2
]
emb_shape
=
[
self
.
vocab_size
,
self
.
emb_dim
]
self
.
user_encoder
=
GrnnEncoder
()
self
.
item_encoder
=
BowEncoder
()
self
.
pairwise_hinge_loss
=
PairwiseHingeLoss
()
user_data
=
fluid
.
data
(
name
=
"user"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
pos_item_data
=
fluid
.
data
(
name
=
"p_item"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
neg_item_data
=
fluid
.
data
(
name
=
"n_item"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
self
.
_data_var
.
extend
([
user_data
,
pos_item_data
,
neg_item_data
])
user_emb
=
fluid
.
embedding
(
input
=
user_data
,
size
=
emb_shape
,
param_attr
=
"emb.item"
)
pos_item_emb
=
fluid
.
embedding
(
...
...
@@ -109,79 +69,115 @@ class Model(ModelBase):
pos_item_enc
=
self
.
item_encoder
.
forward
(
pos_item_emb
)
neg_item_enc
=
self
.
item_encoder
.
forward
(
neg_item_emb
)
user_hid
=
fluid
.
layers
.
fc
(
input
=
user_enc
,
size
=
hidden_size
,
size
=
self
.
hidden_size
,
param_attr
=
'user.w'
,
bias_attr
=
"user.b"
)
pos_item_hid
=
fluid
.
layers
.
fc
(
input
=
pos_item_enc
,
size
=
hidden_size
,
size
=
self
.
hidden_size
,
param_attr
=
'item.w'
,
bias_attr
=
"item.b"
)
neg_item_hid
=
fluid
.
layers
.
fc
(
input
=
neg_item_enc
,
size
=
hidden_size
,
size
=
self
.
hidden_size
,
param_attr
=
'item.w'
,
bias_attr
=
"item.b"
)
cos_pos
=
fluid
.
layers
.
cos_sim
(
user_hid
,
pos_item_hid
)
cos_neg
=
fluid
.
layers
.
cos_sim
(
user_hid
,
neg_item_hid
)
hinge_loss
=
self
.
pairwise_hinge_loss
.
forward
(
cos_pos
,
cos_neg
)
avg_cost
=
fluid
.
layers
.
mean
(
hinge_loss
)
correct
=
self
.
get_correct
(
cos_neg
,
cos_pos
)
correct
=
self
.
_
get_correct
(
cos_neg
,
cos_pos
)
self
.
_cost
=
avg_cost
self
.
_metrics
[
"correct"
]
=
correct
self
.
_metrics
[
"hinge_loss"
]
=
hinge_loss
def
train_net
(
self
):
self
.
train
()
def
infer
(
self
):
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
,
None
,
self
.
_namespace
)
emb_dim
=
envs
.
get_global_env
(
"hyper_parameters.emb_dim"
,
None
,
self
.
_namespace
)
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.hidden_size"
,
None
,
self
.
_namespace
)
user_data
=
fluid
.
data
(
name
=
"user"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
all_item_data
=
fluid
.
data
(
name
=
"all_item"
,
shape
=
[
None
,
vocab_size
],
dtype
=
"int64"
)
pos_label
=
fluid
.
data
(
name
=
"pos_label"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
)
self
.
_infer_data_var
=
[
user_data
,
all_item_data
,
pos_label
]
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
def
_infer_net
(
self
,
inputs
):
user_data
=
inputs
[
0
]
all_item_data
=
inputs
[
1
]
pos_label
=
inputs
[
2
]
user_emb
=
fluid
.
embedding
(
input
=
user_data
,
size
=
[
vocab_size
,
emb_dim
],
param_attr
=
"emb.item"
)
input
=
user_data
,
size
=
[
self
.
vocab_size
,
self
.
emb_dim
],
param_attr
=
"emb.item"
)
all_item_emb
=
fluid
.
embedding
(
input
=
all_item_data
,
size
=
[
vocab_size
,
emb_dim
],
size
=
[
self
.
vocab_size
,
self
.
emb_dim
],
param_attr
=
"emb.item"
)
all_item_emb_re
=
fluid
.
layers
.
reshape
(
x
=
all_item_emb
,
shape
=
[
-
1
,
emb_dim
])
x
=
all_item_emb
,
shape
=
[
-
1
,
self
.
emb_dim
])
user_encoder
=
GrnnEncoder
()
user_enc
=
user_encoder
.
forward
(
user_emb
)
user_hid
=
fluid
.
layers
.
fc
(
input
=
user_enc
,
size
=
hidden_size
,
size
=
self
.
hidden_size
,
param_attr
=
'user.w'
,
bias_attr
=
"user.b"
)
user_exp
=
fluid
.
layers
.
expand
(
x
=
user_hid
,
expand_times
=
[
1
,
vocab_size
])
user_re
=
fluid
.
layers
.
reshape
(
x
=
user_exp
,
shape
=
[
-
1
,
hidden_size
])
x
=
user_hid
,
expand_times
=
[
1
,
self
.
vocab_size
])
user_re
=
fluid
.
layers
.
reshape
(
x
=
user_exp
,
shape
=
[
-
1
,
self
.
hidden_size
])
all_item_hid
=
fluid
.
layers
.
fc
(
input
=
all_item_emb_re
,
size
=
hidden_size
,
size
=
self
.
hidden_size
,
param_attr
=
'item.w'
,
bias_attr
=
"item.b"
)
cos_item
=
fluid
.
layers
.
cos_sim
(
X
=
all_item_hid
,
Y
=
user_re
)
all_pre_
=
fluid
.
layers
.
reshape
(
x
=
cos_item
,
shape
=
[
-
1
,
vocab_size
])
all_pre_
=
fluid
.
layers
.
reshape
(
x
=
cos_item
,
shape
=
[
-
1
,
self
.
vocab_size
])
acc
=
fluid
.
layers
.
accuracy
(
input
=
all_pre_
,
label
=
pos_label
,
k
=
20
)
self
.
_infer_results
[
'recall20'
]
=
acc
def
infer_net
(
self
):
self
.
infer
()
def
_get_correct
(
self
,
x
,
y
):
less
=
tensor
.
cast
(
cf
.
less_than
(
x
,
y
),
dtype
=
'float32'
)
correct
=
fluid
.
layers
.
reduce_sum
(
less
)
return
correct
class
BowEncoder
(
object
):
""" bow-encoder """
def
__init__
(
self
):
self
.
param_name
=
""
def
forward
(
self
,
emb
):
return
fluid
.
layers
.
sequence_pool
(
input
=
emb
,
pool_type
=
'sum'
)
class
GrnnEncoder
(
object
):
""" grnn-encoder """
def
__init__
(
self
,
param_name
=
"grnn"
,
hidden_size
=
128
):
self
.
param_name
=
param_name
self
.
hidden_size
=
hidden_size
def
forward
(
self
,
emb
):
fc0
=
fluid
.
layers
.
fc
(
input
=
emb
,
size
=
self
.
hidden_size
*
3
,
param_attr
=
self
.
param_name
+
"_fc.w"
,
bias_attr
=
False
)
gru_h
=
fluid
.
layers
.
dynamic_gru
(
input
=
fc0
,
size
=
self
.
hidden_size
,
is_reverse
=
False
,
param_attr
=
self
.
param_name
+
".param"
,
bias_attr
=
self
.
param_name
+
".bias"
)
return
fluid
.
layers
.
sequence_pool
(
input
=
gru_h
,
pool_type
=
'max'
)
class
PairwiseHingeLoss
(
object
):
def
__init__
(
self
,
margin
=
0.8
):
self
.
margin
=
margin
def
forward
(
self
,
pos
,
neg
):
loss_part1
=
fluid
.
layers
.
elementwise_sub
(
tensor
.
fill_constant_batch_size_like
(
input
=
pos
,
shape
=
[
-
1
,
1
],
value
=
self
.
margin
,
dtype
=
'float32'
),
pos
)
loss_part2
=
fluid
.
layers
.
elementwise_add
(
loss_part1
,
neg
)
loss_part3
=
fluid
.
layers
.
elementwise_max
(
tensor
.
fill_constant_batch_size_like
(
input
=
loss_part2
,
shape
=
[
-
1
,
1
],
value
=
0.0
,
dtype
=
'float32'
),
loss_part2
)
return
loss_part3
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录