Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
efcd1c08
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
efcd1c08
编写于
5月 22, 2020
作者:
F
frankwhzhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix model style
上级
ee6bd53b
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
78 addition
and
81 deletion
+78
-81
core/model.py
core/model.py
+22
-6
models/rerank/listwise/model.py
models/rerank/listwise/model.py
+54
-65
models/rerank/listwise/random_infer_reader.py
models/rerank/listwise/random_infer_reader.py
+1
-5
models/rerank/listwise/random_reader.py
models/rerank/listwise/random_reader.py
+1
-5
未找到文件。
core/model.py
浏览文件 @
efcd1c08
...
@@ -133,12 +133,28 @@ class Model(object):
...
@@ -133,12 +133,28 @@ class Model(object):
print
(
">>>>>>>>>>>.learnig rate: %s"
%
learning_rate
)
print
(
">>>>>>>>>>>.learnig rate: %s"
%
learning_rate
)
return
self
.
_build_optimizer
(
optimizer
,
learning_rate
)
return
self
.
_build_optimizer
(
optimizer
,
learning_rate
)
@
abc
.
abstractmethod
def
input_data
(
self
,
is_infer
=
False
):
return
None
def
net
(
self
,
is_infer
=
False
):
return
None
def
train_net
(
self
):
def
train_net
(
self
):
"""R
input_data
=
self
.
input_data
(
is_infer
=
False
)
"""
self
.
_data_var
=
input_data
pass
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
self
.
net
(
input_data
,
is_infer
=
False
)
@
abc
.
abstractmethod
def
infer_net
(
self
):
def
infer_net
(
self
):
pass
input_data
=
self
.
input_data
(
is_infer
=
True
)
self
.
_infer_data_var
=
input_data
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
self
.
net
(
input_data
,
is_infer
=
True
)
models/rerank/listwise/model.py
浏览文件 @
efcd1c08
...
@@ -56,67 +56,11 @@ class Model(ModelBase):
...
@@ -56,67 +56,11 @@ class Model(ModelBase):
inputs
=
[
user_slot_names
]
+
[
item_slot_names
]
+
[
lens
]
+
[
labels
]
inputs
=
[
user_slot_names
]
+
[
item_slot_names
]
+
[
lens
]
+
[
labels
]
# demo: hot to use is_infer:
if
is_infer
:
if
is_infer
:
self
.
_infer_data_var
=
inputs
return
inputs
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
else
:
else
:
self
.
_data_var
=
inputs
return
inputs
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
return
inputs
def
_fluid_sequence_pad
(
self
,
input
,
pad_value
,
maxlen
=
None
):
"""
args:
input: (batch*seq_len, dim)
returns:
(batch, max_seq_len, dim)
"""
pad_value
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
assign
(
input
=
np
.
array
([
pad_value
],
'float32'
)),
input
.
dtype
)
input_padded
,
_
=
fluid
.
layers
.
sequence_pad
(
input
,
pad_value
,
maxlen
=
maxlen
)
# (batch, max_seq_len, 1), (batch, 1)
# TODO, maxlen=300, used to solve issues: https://github.com/PaddlePaddle/Paddle/issues/14164
return
input_padded
def
_fluid_sequence_get_pos
(
self
,
lodtensor
):
"""
args:
lodtensor: lod = [[0,4,7]]
return:
pos: lod = [[0,4,7]]
data = [0,1,2,3,0,1,3]
shape = [-1, 1]
"""
lodtensor
=
fluid
.
layers
.
reduce_sum
(
lodtensor
,
dim
=
1
,
keep_dim
=
True
)
assert
lodtensor
.
shape
==
(
-
1
,
1
),
(
lodtensor
.
shape
())
ones
=
fluid
.
layers
.
cast
(
lodtensor
*
0
+
1
,
'float32'
)
# (batch*seq_len, 1)
ones_padded
=
self
.
_fluid_sequence_pad
(
ones
,
0
)
# (batch, max_seq_len, 1)
ones_padded
=
fluid
.
layers
.
squeeze
(
ones_padded
,
[
2
])
# (batch, max_seq_len)
seq_len
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
reduce_sum
(
ones_padded
,
1
,
keep_dim
=
True
),
'int64'
)
# (batch, 1)
seq_len
=
fluid
.
layers
.
squeeze
(
seq_len
,
[
1
])
pos
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
cumsum
(
ones_padded
,
1
,
exclusive
=
True
),
'int64'
)
pos
=
fluid
.
layers
.
sequence_unpad
(
pos
,
seq_len
)
# (batch*seq_len, 1)
pos
.
stop_gradient
=
True
return
pos
def
net
(
self
,
inputs
,
is_infer
=
False
):
def
net
(
self
,
inputs
,
is_infer
=
False
):
# user encode
# user encode
...
@@ -225,10 +169,55 @@ class Model(ModelBase):
...
@@ -225,10 +169,55 @@ class Model(ModelBase):
self
.
_cost
=
loss
self
.
_cost
=
loss
self
.
_metrics
[
'auc'
]
=
auc_val
self
.
_metrics
[
'auc'
]
=
auc_val
def
train_net
(
self
):
def
_fluid_sequence_pad
(
self
,
input
,
pad_value
,
maxlen
=
None
):
input_data
=
self
.
input_data
()
"""
self
.
net
(
input_data
)
args:
input: (batch*seq_len, dim)
returns:
(batch, max_seq_len, dim)
"""
pad_value
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
assign
(
input
=
np
.
array
([
pad_value
],
'float32'
)),
input
.
dtype
)
input_padded
,
_
=
fluid
.
layers
.
sequence_pad
(
input
,
pad_value
,
maxlen
=
maxlen
)
# (batch, max_seq_len, 1), (batch, 1)
# TODO, maxlen=300, used to solve issues: https://github.com/PaddlePaddle/Paddle/issues/14164
return
input_padded
def
_fluid_sequence_get_pos
(
self
,
lodtensor
):
"""
args:
lodtensor: lod = [[0,4,7]]
return:
pos: lod = [[0,4,7]]
data = [0,1,2,3,0,1,3]
shape = [-1, 1]
"""
lodtensor
=
fluid
.
layers
.
reduce_sum
(
lodtensor
,
dim
=
1
,
keep_dim
=
True
)
assert
lodtensor
.
shape
==
(
-
1
,
1
),
(
lodtensor
.
shape
())
ones
=
fluid
.
layers
.
cast
(
lodtensor
*
0
+
1
,
'float32'
)
# (batch*seq_len, 1)
ones_padded
=
self
.
_fluid_sequence_pad
(
ones
,
0
)
# (batch, max_seq_len, 1)
ones_padded
=
fluid
.
layers
.
squeeze
(
ones_padded
,
[
2
])
# (batch, max_seq_len)
seq_len
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
reduce_sum
(
ones_padded
,
1
,
keep_dim
=
True
),
'int64'
)
# (batch, 1)
seq_len
=
fluid
.
layers
.
squeeze
(
seq_len
,
[
1
])
pos
=
fluid
.
layers
.
cast
(
fluid
.
layers
.
cumsum
(
ones_padded
,
1
,
exclusive
=
True
),
'int64'
)
pos
=
fluid
.
layers
.
sequence_unpad
(
pos
,
seq_len
)
# (batch*seq_len, 1)
pos
.
stop_gradient
=
True
return
pos
#def train_net(self):
# input_data = self.input_data()
# self.net(input_data)
def
infer_net
(
self
):
#
def infer_net(self):
input_data
=
self
.
input_data
(
is_infer
=
True
)
#
input_data = self.input_data(is_infer=True)
self
.
net
(
input_data
,
is_infer
=
True
)
#
self.net(input_data, is_infer=True)
models/rerank/listwise/random_infer_reader.py
浏览文件 @
efcd1c08
...
@@ -44,11 +44,7 @@ class EvaluateReader(Reader):
...
@@ -44,11 +44,7 @@ class EvaluateReader(Reader):
length
=
[
self
.
item_len
]
*
self
.
batch_size
length
=
[
self
.
item_len
]
*
self
.
batch_size
label
=
np
.
random
.
randint
(
label
=
np
.
random
.
randint
(
2
,
size
=
(
self
.
batch_size
,
self
.
item_len
)).
tolist
()
2
,
size
=
(
self
.
batch_size
,
self
.
item_len
)).
tolist
()
output
=
[]
output
=
[
user_slot_name
,
item_slot_name
,
length
,
label
]
output
.
append
(
user_slot_name
)
output
.
append
(
item_slot_name
)
output
.
append
(
length
)
output
.
append
(
label
)
yield
output
yield
output
...
...
models/rerank/listwise/random_reader.py
浏览文件 @
efcd1c08
...
@@ -44,11 +44,7 @@ class TrainReader(Reader):
...
@@ -44,11 +44,7 @@ class TrainReader(Reader):
length
=
[
self
.
item_len
]
*
self
.
batch_size
length
=
[
self
.
item_len
]
*
self
.
batch_size
label
=
np
.
random
.
randint
(
label
=
np
.
random
.
randint
(
2
,
size
=
(
self
.
batch_size
,
self
.
item_len
)).
tolist
()
2
,
size
=
(
self
.
batch_size
,
self
.
item_len
)).
tolist
()
output
=
[]
output
=
[
user_slot_name
,
item_slot_name
,
length
,
label
]
output
.
append
(
user_slot_name
)
output
.
append
(
item_slot_name
)
output
.
append
(
length
)
output
.
append
(
label
)
yield
output
yield
output
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录