Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
5f4af11a
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5f4af11a
编写于
9月 07, 2021
作者:
F
Fan Zhang
提交者:
GitHub
9月 07, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[CPU-PSLIB] Add consistency insepection of use_var_list and data_generator data (#34988)
上级
0ed9b051
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
471 addition
and
0 deletion
+471
-0
python/paddle/fluid/dataset.py
python/paddle/fluid/dataset.py
+65
-0
python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py
...id/tests/unittests/test_dataset_consistency_inspection.py
+406
-0
未找到文件。
python/paddle/fluid/dataset.py
浏览文件 @
5f4af11a
...
...
@@ -321,6 +321,71 @@ class DatasetBase(object):
def
_dynamic_adjust_after_train
(
self
):
pass
def
check_use_var_with_data_generator
(
self
,
var_list
,
data_generator_class
,
test_file
):
"""
Var consistency insepection of use_var_list and data_generator data.
Examples:
.. code-block:: python
# required: skiptest
import paddle.fluid as fluid
from dataset_generator import CTRDataset
dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
generator_class = CTRDataset()
dataset.check_use_var_with_data_generator([data, label], generator_class, "data/part-00000")
Args:
var_list(list): variable list
data_generator_class(class): data_generator class
test_file(str): local test file path
"""
f
=
open
(
test_file
,
"r"
)
var_len
=
len
(
var_list
)
while
True
:
line
=
f
.
readline
()
if
line
:
line_iter
=
data_generator_class
.
generate_sample
(
line
)
for
user_parsed_line
in
line_iter
():
data_gen_len
=
len
(
user_parsed_line
)
if
var_len
!=
data_gen_len
:
raise
ValueError
(
"var length mismatch error: var_list = %s vs data_generator = %s"
%
(
var_len
,
data_gen_len
))
for
i
,
ele
in
enumerate
(
user_parsed_line
):
if
len
(
ele
[
1
])
==
0
:
raise
ValueError
(
"var length error: var %s's length in data_generator is 0"
%
ele
[
0
])
if
var_list
[
i
].
dtype
==
core
.
VarDesc
.
VarType
.
FP32
and
not
all
(
isinstance
(
ele
,
float
)
for
ele
in
ele
[
1
]):
raise
TypeError
(
"var dtype mismatch error: var name = %s, var type in var_list = %s, while var in data_generator contains non-float value, which is %s
\n
"
"Please check if order of var_list and data_generator are aligned.
\n
"
"Please check if var's type in data_generator is correct."
%
(
ele
[
0
],
"float"
,
ele
[
1
]))
if
(
var_list
[
i
].
dtype
==
core
.
VarDesc
.
VarType
.
INT64
or
var_list
[
i
].
dtype
==
core
.
VarDesc
.
VarType
.
INT32
)
and
not
all
(
isinstance
(
ele
,
int
)
for
ele
in
ele
[
1
]):
raise
TypeError
(
"var dtype mismatch error: var name = %s, var type in var_list = %s, while var in data_generator contains non-int value, which is %s
\n
"
"Please check if order of var_list and data_generator are aligned.
\n
"
"Please check if var's type in data_generator is correct."
%
(
ele
[
0
],
"int"
,
ele
[
1
]))
else
:
break
f
.
close
()
class
InMemoryDataset
(
DatasetBase
):
"""
...
...
python/paddle/fluid/tests/unittests/test_dataset_consistency_inspection.py
0 → 100644
浏览文件 @
5f4af11a
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
TestCases for Dataset consistency insepection of use_var_list and data_generator.
"""
from
__future__
import
print_function
import
paddle
import
paddle.fluid
as
fluid
import
paddle.compat
as
cpt
import
paddle.fluid.core
as
core
import
numpy
as
np
import
random
import
math
import
os
import
shutil
import
unittest
import
paddle.fluid.incubate.data_generator
as
dg
#paddle.enable_static()
# fluid.disable_dygraph()
fluid
.
disable_dygraph
()
url_schema_len
=
5
query_schema
=
[
'Q_query_basic'
,
'Q_query_phrase'
,
'Q_quq'
,
'Q_timelevel'
,
'Q_context_title_basic1'
,
'Q_context_title_basic2'
,
'Q_context_title_basic3'
,
'Q_context_title_basic4'
,
'Q_context_title_basic5'
,
'Q_context_title_phrase1'
,
'Q_context_title_phrase2'
,
'Q_context_title_phrase3'
,
'Q_context_title_phrase4'
,
'Q_context_title_phrase5'
,
'Q_context_site1'
,
'Q_context_site2'
,
'Q_context_site3'
,
'Q_context_site4'
,
'Q_context_site5'
]
class
CTRDataset
(
dg
.
MultiSlotDataGenerator
):
def
__init__
(
self
,
mode
):
self
.
test
=
mode
def
generate_sample
(
self
,
line
):
def
reader
():
ins
=
line
.
strip
().
split
(
';'
)
label_pos_num
=
int
(
ins
[
1
].
split
(
' '
)[
0
])
label_neg_num
=
int
(
ins
[
1
].
split
(
' '
)[
1
])
#query fea parse
bias
=
2
query_len
=
0
sparse_query_feature
=
[]
for
index
in
range
(
len
(
query_schema
)):
pos
=
index
+
bias
sparse_query_feature
.
append
(
[
int
(
x
)
for
x
in
ins
[
pos
].
split
(
' '
)])
if
index
==
0
:
query_len
=
len
(
ins
[
pos
].
split
(
' '
))
query_len
=
1.0
/
(
1
+
pow
(
2.7182818
,
3
-
1.0
*
query_len
))
#positive url fea parse
bias
=
2
+
len
(
query_schema
)
pos_url_feas
=
[]
pos_click_feas
=
[]
pos_context_feas
=
[]
for
k
in
range
(
label_pos_num
):
pos_url_fea
=
[]
pos
=
0
for
index
in
range
(
url_schema_len
-
1
):
pos
=
bias
+
k
*
(
url_schema_len
)
+
index
pos_url_fea
.
append
([
int
(
x
)
for
x
in
ins
[
pos
].
split
(
' '
)])
#click info
if
(
ins
[
pos
+
1
]
==
''
):
continue
item
=
ins
[
pos
+
1
].
split
(
' '
)
if
len
(
item
)
!=
17
:
continue
stat_fea
=
[[
max
(
float
(
item
[
i
]),
0.0
)]
for
i
in
range
(
len
(
item
))
\
if
not
(
i
==
5
or
i
==
9
or
i
==
13
or
i
==
14
or
i
==
15
or
i
==
16
)]
pos_url_feas
.
append
(
pos_url_fea
)
pos_click_feas
.
append
(
stat_fea
)
query_serach
=
float
(
item
[
5
])
if
query_serach
>
0.0
:
query_serach
=
min
(
math
.
log
(
query_serach
),
10.0
)
/
10.0
pos_context_fea
=
[[
query_serach
],
[
query_len
]]
pos_context_feas
.
append
(
pos_context_fea
)
#negative url fea parse
bias
=
2
+
len
(
query_schema
)
+
label_pos_num
*
(
url_schema_len
)
neg_url_feas
=
[]
neg_click_feas
=
[]
neg_context_feas
=
[]
for
k
in
range
(
label_neg_num
):
neg_url_fea
=
[]
pos
=
0
for
index
in
range
(
url_schema_len
-
1
):
pos
=
bias
+
k
*
(
url_schema_len
)
+
index
neg_url_fea
.
append
([
int
(
x
)
for
x
in
ins
[
pos
].
split
(
' '
)])
if
(
ins
[
pos
+
1
]
==
''
):
continue
item
=
ins
[
pos
+
1
].
split
(
' '
)
#zdf_tmp
if
len
(
item
)
!=
17
:
continue
#print ins[pos + 1]
stat_fea
=
[[
max
(
float
(
item
[
i
]),
0.0
)]
for
i
in
range
(
len
(
item
))
\
if
not
(
i
==
5
or
i
==
9
or
i
==
13
or
i
==
14
or
i
==
15
or
i
==
16
)]
neg_click_feas
.
append
(
stat_fea
)
neg_url_feas
.
append
(
neg_url_fea
)
query_serach
=
float
(
item
[
5
])
if
query_serach
>
0.0
:
query_serach
=
min
(
math
.
log
(
query_serach
),
10.0
)
/
10.0
neg_context_fea
=
[[
query_serach
],
[
query_len
]]
neg_context_feas
.
append
(
neg_context_fea
)
#make train data
if
self
.
test
==
1
:
for
p
in
range
(
len
(
pos_url_feas
)):
# feature_name = ["click"] + query_schema + url_schema[:4] + click_info_schema[:11] + context_schema[:2]
feature_name
=
[
"click"
]
for
i
in
range
(
1
,
54
):
feature_name
.
append
(
str
(
i
))
pos_url_fea
=
pos_url_feas
[
p
]
pos_click_fea
=
pos_click_feas
[
p
]
pos_context_fea
=
pos_context_feas
[
p
]
yield
zip
(
feature_name
,
[[
1
]]
+
sparse_query_feature
+
pos_url_fea
+
pos_click_fea
+
pos_context_fea
+
pos_url_fea
+
pos_click_fea
+
pos_context_fea
)
for
n
in
range
(
len
(
neg_url_feas
)):
feature_name
=
[
"click"
]
for
i
in
range
(
1
,
54
):
feature_name
.
append
(
str
(
i
))
neg_url_fea
=
neg_url_feas
[
n
]
neg_click_fea
=
neg_click_feas
[
n
]
neg_context_fea
=
neg_context_feas
[
n
]
yield
zip
(
feature_name
,
[[
0
]]
+
sparse_query_feature
+
neg_url_fea
+
neg_click_fea
+
neg_context_fea
+
neg_url_fea
+
neg_click_fea
+
neg_context_fea
)
elif
self
.
test
==
0
:
for
p
in
range
(
len
(
pos_url_feas
)):
#feature_name = ["click"] + query_schema + url_schema[:4] + click_info_schema[:11] + context_schema[:2] + url_schema[4:] + click_info_schema[11:] + context_schema[2:]
feature_name
=
[
"click"
]
for
i
in
range
(
1
,
54
):
feature_name
.
append
(
str
(
i
))
#print("#######")
#print(feature_name)
#print("#######")
pos_url_fea
=
pos_url_feas
[
p
]
pos_click_fea
=
pos_click_feas
[
p
]
pos_context_fea
=
pos_context_feas
[
p
]
for
n
in
range
(
len
(
neg_url_feas
)):
# prob = get_rand()
# if prob < sample_rate:
neg_url_fea
=
neg_url_feas
[
n
]
neg_click_fea
=
neg_click_feas
[
n
]
neg_context_fea
=
neg_context_feas
[
n
]
#print("q:", query_feas)
#print("pos:", pos_url_fea)
#print("neg:", neg_url_fea)
# yield zip(feature_name[:3], sparse_query_feature[:3])
yield
list
(
zip
(
feature_name
,
[[
1
]]
+
sparse_query_feature
+
pos_url_fea
+
pos_click_fea
+
pos_context_fea
+
\
neg_url_fea
+
neg_click_fea
+
neg_context_fea
))
elif
self
.
test
==
2
:
for
p
in
range
(
len
(
pos_url_feas
)):
#feature_name = ["click"] + query_schema + url_schema[:4] + click_info_schema[:11] + context_schema[:2] + url_schema[4:] + click_info_schema[11:] + context_schema[2:]
feature_name
=
[
"click"
]
for
i
in
range
(
1
,
54
):
feature_name
.
append
(
str
(
i
))
#print("#######")
#print(feature_name)
#print("#######")
pos_url_fea
=
pos_url_feas
[
p
]
pos_click_fea
=
pos_click_feas
[
p
]
pos_context_fea
=
pos_context_feas
[
p
]
for
n
in
range
(
len
(
neg_url_feas
)):
# prob = get_rand()
# if prob < sample_rate:
neg_url_fea
=
neg_url_feas
[
n
]
neg_click_fea
=
neg_click_feas
[
n
]
neg_context_fea
=
neg_context_feas
[
n
]
#print("q:", query_feas)
#print("pos:", pos_url_fea)
#print("neg:", neg_url_fea)
# yield zip(feature_name[:3], sparse_query_feature[:3])
yield
list
(
zip
(
feature_name
,
[[
1
],
[
2
]]
+
sparse_query_feature
+
pos_url_fea
+
pos_click_fea
+
pos_context_fea
+
\
neg_url_fea
+
neg_click_fea
+
neg_context_fea
))
elif
self
.
test
==
3
:
for
p
in
range
(
len
(
pos_url_feas
)):
#feature_name = ["click"] + query_schema + url_schema[:4] + click_info_schema[:11] + context_schema[:2] + url_schema[4:] + click_info_schema[11:] + context_schema[2:]
feature_name
=
[
"click"
]
for
i
in
range
(
1
,
54
):
feature_name
.
append
(
str
(
i
))
#print("#######")
#print(feature_name)
#print("#######")
pos_url_fea
=
pos_url_feas
[
p
]
pos_click_fea
=
pos_click_feas
[
p
]
pos_context_fea
=
pos_context_feas
[
p
]
for
n
in
range
(
len
(
neg_url_feas
)):
# prob = get_rand()
# if prob < sample_rate:
neg_url_fea
=
neg_url_feas
[
n
]
neg_click_fea
=
neg_click_feas
[
n
]
neg_context_fea
=
neg_context_feas
[
n
]
#print("q:", query_feas)
#print("pos:", pos_url_fea)
#print("neg:", neg_url_fea)
# yield zip(feature_name[:3], sparse_query_feature[:3])
yield
list
(
zip
(
feature_name
,
[[
1
],
[
2.0
]]
+
sparse_query_feature
+
pos_url_fea
+
pos_click_fea
+
pos_context_fea
+
\
neg_url_fea
+
neg_click_fea
+
neg_context_fea
))
elif
self
.
test
==
4
:
for
p
in
range
(
len
(
pos_url_feas
)):
#feature_name = ["click"] + query_schema + url_schema[:4] + click_info_schema[:11] + context_schema[:2] + url_schema[4:] + click_info_schema[11:] + context_schema[2:]
feature_name
=
[
"click"
]
for
i
in
range
(
1
,
54
):
feature_name
.
append
(
str
(
i
))
#print("#######")
#print(feature_name)
#print("#######")
pos_url_fea
=
pos_url_feas
[
p
]
pos_click_fea
=
pos_click_feas
[
p
]
pos_context_fea
=
pos_context_feas
[
p
]
for
n
in
range
(
len
(
neg_url_feas
)):
# prob = get_rand()
# if prob < sample_rate:
neg_url_fea
=
neg_url_feas
[
n
]
neg_click_fea
=
neg_click_feas
[
n
]
neg_context_fea
=
neg_context_feas
[
n
]
#print("q:", query_feas)
#print("pos:", pos_url_fea)
#print("neg:", neg_url_fea)
# yield zip(feature_name[:3], sparse_query_feature[:3])
yield
list
(
zip
(
feature_name
,
[[],
[
2.0
]]
+
sparse_query_feature
+
pos_url_fea
+
pos_click_fea
+
pos_context_fea
+
\
neg_url_fea
+
neg_click_fea
+
neg_context_fea
))
elif
self
.
test
==
5
:
for
p
in
range
(
len
(
pos_url_feas
)):
#feature_name = ["click"] + query_schema + url_schema[:4] + click_info_schema[:11] + context_schema[:2] + url_schema[4:] + click_info_schema[11:] + context_schema[2:]
feature_name
=
[
"click"
]
for
i
in
range
(
1
,
54
):
feature_name
.
append
(
str
(
i
))
#print("#######")
#print(feature_name)
#print("#######")
pos_url_fea
=
pos_url_feas
[
p
]
pos_click_fea
=
pos_click_feas
[
p
]
pos_context_fea
=
pos_context_feas
[
p
]
for
n
in
range
(
len
(
neg_url_feas
)):
# prob = get_rand()
# if prob < sample_rate:
neg_url_fea
=
neg_url_feas
[
n
]
neg_click_fea
=
neg_click_feas
[
n
]
neg_context_fea
=
neg_context_feas
[
n
]
#print("q:", query_feas)
#print("pos:", pos_url_fea)
#print("neg:", neg_url_fea)
# yield zip(feature_name[:3], sparse_query_feature[:3])
yield
list
(
zip
(
feature_name
,
sparse_query_feature
+
pos_url_fea
+
pos_click_fea
+
pos_context_fea
+
\
neg_url_fea
+
neg_click_fea
+
neg_context_fea
))
return
reader
class
TestDataset
(
unittest
.
TestCase
):
""" TestCases for Dataset. """
def
setUp
(
self
):
pass
# use_data_loader = False
# epoch_num = 10
# drop_last = False
def
test_var_consistency_insepection
(
self
):
"""
Testcase for InMemoryDataset of consistency insepection of use_var_list and data_generator.
"""
with
open
(
"test_run_with_dump_a.txt"
,
"w"
)
as
f
:
# data = "\n"
# data += "\n"
data
=
"2 1;1 9;20002001 20001240 20001860 20003611 20000723;20002001 20001240 20001860 20003611 20000723;0;40000001;20002001 20001240 20001860 20003611 20000157 20000723 20000070 20002616 20000157 20000005;20002001 20001240 20001860 20003611 20000157 20001776 20000070 20002616 20000157 20000005;20002001 20001240 20001860 20003611 20000723 20000070 20002001 20001240 20001860 20003611 20012788 20000157;20002001 20001240 20001860 20003611 20000623 20000251 20000157 20000723 20000070 20000001 20000057;20002640 20004695 20000157 20000723 20000070 20002001 20001240 20001860 20003611;20002001 20001240 20001860 20003611 20000157 20000723 20000070 20003519 20000005;20002001 20001240 20001860 20003611 20000157 20001776 20000070 20003519 20000005;20002001 20001240 20001860 20003611 20000723 20000070 20002001 20001240 20001860 20003611 20131464;20002001 20001240 20001860 20003611 20018820 20000157 20000723 20000070 20000001 20000057;20002640 20034154 20000723 20000070 20002001 20001240 20001860 20003611;10000200;10000200;10063938;10000008;10000177;20002001 20001240 20001860 20003611 20010833 20000210 20000500 20000401 20000251 20012198 20001023 20000157;20002001 20001240 20001860 20003611 20012396 20000500 20002513 20012198 20001023 20000157;10000123;30000004;0.623 0.233 0.290 0.208 0.354 49.000 0.000 0.000 0.000 -1.000 0.569 0.679 0.733 53 17 2 0;20002001 20001240 20001860 20003611 20000723;20002001 20001240 20001860 20003611 20000723;10000047;30000004;0.067 0.000 0.161 0.005 0.000 49.000 0.000 0.000 0.000 -1.000 0.000 0.378 0.043 0 6 0 0;20002001 20001240 20001860 20003611 20000157 20000723 20000070 20002616 20000157 20000005;20002001 20001240 20001860 20003611 20000157 20000723 20000070 20003519 20000005;10000200;30000001;0.407 0.111 0.196 0.095 0.181 49.000 0.000 0.000 0.000 -1.000 0.306 0.538 0.355 48 8 0 0;20002001 20001240 20001860 20003611 20000157 20001776 20000070 20002616 20000157 20000005;20002001 20001240 20001860 20003611 20000157 20001776 20000070 20003519 20000005;10000200;30000001;0.226 0.029 0.149 0.031 0.074 49.000 0.000 0.000 0.000 -1.000 0.220 0.531 0.286 26 6 0 0;20002001 20001240 20001860 20003611 20000723 20000070 20002001 20001240 20001860 20003611 20012788 20000157;20002001 20001240 20001860 20003611 20000723 20000070 20002001 20001240 20001860 20003611 20131464;10063938;30000001;0.250 0.019 0.138 0.012 0.027 49.000 0.000 0.000 0.000 -1.000 0.370 0.449 0.327 7 2 0 0;20002001 20001240 20001860 20003611 20000723;20002001 20001240 20001860 20003611 20000723;10000003;30000002;0.056 0.000 0.139 0.003 0.000 49.000 0.000 0.000 0.000 -1.000 0.000 0.346 0.059 15 3 0 0;20002001 20001240 20001860 20003611 20000623 20000251 20000157 20000723 20000070 20000001 20000057;20002001 20001240 20001860 20003611 20018820 20000157 20000723 20000070 20000001 20000057;10000008;30000001;0.166 0.004 0.127 0.001 0.004 49.000 0.000 0.000 0.000 -1.000 0.103 0.417 0.394 10 3 0 0;20002640 20004695 20000157 20000723 20000070 20002001 20001240 20001860 20003611;20002640 20034154 20000723 20000070 20002001 20001240 20001860 20003611;10000177;30000001;0.094 0.008 0.157 0.012 0.059 49.000 0.000 0.000 0.000 -1.000 0.051 0.382 0.142 21 0 0 0;20002001 20001240 20001860 20003611 20000157 20001776 20000070 20000157;20002001 20001240 20001860 20003611 20000157 20001776 20000070 20000157;10000134;30000001;0.220 0.016 0.181 0.037 0.098 49.000 0.000 0.000 0.000 -1.000 0.192 0.453 0.199 17 1 0 0;20002001 20001240 20001860 20003611 20002640 20004695 20000157 20000723 20000070 20002001 20001240 20001860 20003611;20002001 20001240 20001860 20003611 20002640 20034154 20000723 20000070 20002001 20001240 20001860 20003611;10000638;30000001;0.000 0.000 0.000 0.000 0.000 49.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0 0 0 0;
\n
"
data
+=
"2 1;1 11;20000025 20000404;20001923;20000002 20000157 20000028 20004205 20000500 20028809 20000571 20000007 20027523 20004940 20000651 20000043 20000051 20000520 20015398 20000066 20004720 20000070 20001648;40000001;20000025 20000404 20000571 20004940 20000001 20000017;20000025 20000404 20000029 20000500 20001408 20000404 20000001 20000017;0;0;0;20001923 20011130 20000027;20001923 20000029 20000500 20001408 20000404 20000027;0;0;0;10000005;10000005;0;0;0;20003316 20000392 20001979 20000474 20000025 20000194 20000025 20000404 20000019 20000109;20016528 20024913 20004748 20001923 20000019 20000109;10000015;30000002;0.572 0.043 0.401 0.352 0.562 32859.000 0.005 0.060 0.362 -1.000 0.448 0.673 0.222 16316 991 89 0;20000025 20000404 20000571 20004940 20000001 20000017;20001923 20011130 20000027;10000005;30000001;0.495 0.024 0.344 0.285 0.379 32859.000 0.002 0.050 0.362 -1.000 0.423 0.764 0.254 19929 896 72 0;20000202 20000026 20001314 20004289 20000025 20000404 20000451 20000089 20000007;20000202 20000026 20014094 20001314 20004289 20001923 20000451 20000089 20000007;10000035;30000003;0.133 0.006 0.162 0.042 0.174 32859.000 0.003 0.037 0.362 -1.000 0.363 0.542 0.122 14763 664 53 0;20000202 20000026 20001314 20004289 20000025 20000404;20000202 20000026 20014094 20001314 20004289 20001923;10000021;30000001;0.058 0.004 0.133 0.017 0.120 32859.000 0.000 0.006 0.362 -1.000 0.168 0.437 0.041 -1 -1 -1 -1;20000025 20000404 20000018 20012461 20001699 20000446 20000174 20000062 20000133 20003172 20000240 20007877 20067375 20000111 20000164 20001410 20000204 20016958;20001923 20000018 20012461 20001699 20007717 20000062 20000133 20003172 20000240 20007877 20067375 20000111 20000164 20001410 20000204 20016958;10000002;30000001;0.017 0.000 0.099 0.004 0.072 32859.000 0.000 0.009 0.362 -1.000 0.058 0.393 0.025 -1 -1 -1 -1;20000025 20000404;20001923;10000133;30000005;0.004 0.000 0.122 0.000 0.000 32859.000 0.000 0.000 0.362 -1.000 0.000 0.413 0.020 0 444 35 0;20000025 20000404;20001923;10005297;30000004;0.028 0.000 0.138 0.002 0.000 32859.000 0.000 0.000 0.362 -1.000 0.000 0.343 0.024 0 600 48 0;20000025 20000404;20001923;10000060;30000005;0.107 0.000 0.110 0.027 0.077 32859.000 0.000 0.005 0.362 -1.000 0.095 0.398 0.062 1338 491 39 0;20002960 20005534 20000043 20000025 20000404 20000025 20000007;20002960 20005534 20000043 20001923 20000025 20000007;10000020;30000003;0.041 0.000 0.122 0.012 0.101 32859.000 0.001 0.025 0.362 -1.000 0.302 0.541 0.065 9896 402 35 0;20000025 20000404 20000259 20000228 20000235 20000142;20001923 20000259 20000264 20000142;10000024;30000003;0.072 0.002 0.156 0.026 0.141 32859.000 0.002 0.032 0.362 -1.000 0.386 0.569 0.103 9896 364 35 0;20000025 20000404 20000029 20000500 20001408 20000404 20000001 20000017;20001923 20000029 20000500 20001408 20000404 20000027;10000005;30000001;0.328 0.006 0.179 0.125 0.181 32859.000 0.003 0.058 0.362 -1.000 0.300 0.445 0.141 9896 402 32 0;20000025 20000404;20001923;10012839;30000002;0.012 0.000 0.108 0.002 0.048 32859.000 0.000 0.000 0.362 -1.000 0.021 0.225 0.016 2207 120 12 0;
\n
"
# data += ""
f
.
write
(
data
)
slot_data
=
[]
label
=
fluid
.
layers
.
data
(
name
=
"click"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
,
lod_level
=
0
,
append_batch_size
=
False
)
slot_data
.
append
(
label
)
# sprase_query_feat_names
len_sparse_query
=
19
for
feat_name
in
range
(
1
,
len_sparse_query
+
1
):
slot_data
.
append
(
fluid
.
layers
.
data
(
name
=
str
(
feat_name
),
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
))
# sparse_url_feat_names
for
feat_name
in
range
(
len_sparse_query
+
1
,
len_sparse_query
+
5
):
slot_data
.
append
(
fluid
.
layers
.
data
(
name
=
str
(
feat_name
),
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
))
# dense_feat_names
for
feat_name
in
range
(
len_sparse_query
+
5
,
len_sparse_query
+
16
):
slot_data
.
append
(
fluid
.
layers
.
data
(
name
=
str
(
feat_name
),
shape
=
[
1
],
dtype
=
'float32'
))
# context_feat_namess
for
feat_name
in
range
(
len_sparse_query
+
16
,
len_sparse_query
+
18
):
slot_data
.
append
(
fluid
.
layers
.
data
(
name
=
str
(
feat_name
),
shape
=
[
1
],
dtype
=
'float32'
))
# neg sparse_url_feat_names
for
feat_name
in
range
(
len_sparse_query
+
18
,
len_sparse_query
+
22
):
slot_data
.
append
(
fluid
.
layers
.
data
(
name
=
str
(
feat_name
),
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
))
# neg dense_feat_names
for
feat_name
in
range
(
len_sparse_query
+
22
,
len_sparse_query
+
33
):
slot_data
.
append
(
fluid
.
layers
.
data
(
name
=
str
(
feat_name
),
shape
=
[
1
],
dtype
=
'float32'
))
# neg context_feat_namess
for
feat_name
in
range
(
len_sparse_query
+
33
,
len_sparse_query
+
35
):
slot_data
.
append
(
fluid
.
layers
.
data
(
name
=
str
(
feat_name
),
shape
=
[
1
],
dtype
=
'float32'
))
dataset
=
fluid
.
DatasetFactory
().
create_dataset
(
"InMemoryDataset"
)
print
(
"========================================"
)
generator_class
=
CTRDataset
(
mode
=
0
)
try
:
dataset
.
check_use_var_with_data_generator
(
slot_data
,
generator_class
,
"test_run_with_dump_a.txt"
)
print
(
"case 1: check passed!"
)
except
Exception
as
e
:
print
(
"warning: catch expected error"
)
print
(
e
)
print
(
"========================================"
)
print
(
"
\n
"
)
print
(
"========================================"
)
generator_class
=
CTRDataset
(
mode
=
2
)
try
:
dataset
.
check_use_var_with_data_generator
(
slot_data
,
generator_class
,
"test_run_with_dump_a.txt"
)
except
Exception
as
e
:
print
(
"warning: case 2 catch expected error"
)
print
(
e
)
print
(
"========================================"
)
print
(
"
\n
"
)
print
(
"========================================"
)
generator_class
=
CTRDataset
(
mode
=
3
)
try
:
dataset
.
check_use_var_with_data_generator
(
slot_data
,
generator_class
,
"test_run_with_dump_a.txt"
)
except
Exception
as
e
:
print
(
"warning: case 3 catch expected error"
)
print
(
e
)
print
(
"========================================"
)
print
(
"
\n
"
)
print
(
"========================================"
)
generator_class
=
CTRDataset
(
mode
=
4
)
try
:
dataset
.
check_use_var_with_data_generator
(
slot_data
,
generator_class
,
"test_run_with_dump_a.txt"
)
except
Exception
as
e
:
print
(
"warning: case 4 catch expected error"
)
print
(
e
)
print
(
"========================================"
)
print
(
"
\n
"
)
print
(
"========================================"
)
generator_class
=
CTRDataset
(
mode
=
5
)
try
:
dataset
.
check_use_var_with_data_generator
(
slot_data
,
generator_class
,
"test_run_with_dump_a.txt"
)
except
Exception
as
e
:
print
(
"warning: case 5 catch expected error"
)
print
(
e
)
print
(
"========================================"
)
os
.
remove
(
"./test_run_with_dump_a.txt"
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录