提交 522d4d4d 编写于 作者: X xujiaqi01

slot reader for rank

上级 91dad673
......@@ -54,7 +54,6 @@ class TranspileTrainer(Trainer):
sparse_slots = envs.get_global_env("sparse_slots", None, namespace)
dense_slots = envs.get_global_env("dense_slots", None, namespace)
batch_size = envs.get_global_env("batch_size", None, namespace)
print("batch_size: {}".format(batch_size))
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
self.cont_max_ = [20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
self.cont_diff_ = [20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
self.hash_dim_ = envs.get_global_env("hyper_parameters.sparse_feature_number", None, "train.model")
self.continuous_range_ = range(1, 14)
self.categorical_range_ = range(14, 40)
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
"""
features = line.rstrip('\n').split('\t')
dense_feature = []
sparse_feature = []
for idx in self.continuous_range_:
if features[idx] == "":
dense_feature.append(0.0)
else:
dense_feature.append(
(float(features[idx]) - self.cont_min_[idx - 1]) /
self.cont_diff_[idx - 1])
for idx in self.categorical_range_:
sparse_feature.append(
[hash(str(idx) + features[idx]) % self.hash_dim_])
label = [int(features[0])]
feature_name = ["D"]
for idx in self.categorical_range_:
feature_name.append("S" + str(idx - 13))
feature_name.append("label")
yield zip(feature_name, [dense_feature] + sparse_feature + [label])
return reader
......@@ -22,9 +22,10 @@ train:
reader:
batch_size: 2
class: "{workspace}/criteo_reader.py"
train_data_path: "{workspace}/data/train"
train_data_path: "{workspace}/slot_data/train"
feat_dict_name: "{workspace}/data/vocab"
sparse_slots: "label C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26"
dense_slots: "I1:1 I2:1 I3:1 I4:1 I5:1 I6:1 I7:1 I8:1 I9:1 I10:1 I11:1 I12:1 I13:1"
model:
models: "{workspace}/model.py"
......@@ -34,7 +35,7 @@ train:
l2_reg_cross: 0.00005
dnn_use_bn: False
clip_by_norm: 100.0
cat_feat_num: "{workspace}/data/cat_feature_num.txt"
cat_feat_num: "{workspace}/slot_data/cat_feature_num.txt"
is_sparse: False
is_test: False
num_field: 39
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import math
import sys
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try:
import cPickle as pickle
except ImportError:
import pickle
from collections import Counter
import os
class TrainReader(Reader):
def init(self):
self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
self.cont_max_ = [
5775, 257675, 65535, 969, 23159456, 431037, 56311, 6047, 29019, 11,
231, 4008, 7393
]
self.cont_diff_ = [
self.cont_max_[i] - self.cont_min_[i]
for i in range(len(self.cont_min_))
]
self.cont_idx_ = list(range(1, 14))
self.cat_idx_ = list(range(14, 40))
dense_feat_names = ['I' + str(i) for i in range(1, 14)]
sparse_feat_names = ['C' + str(i) for i in range(1, 27)]
target = ['label']
self.label_feat_names = target + dense_feat_names + sparse_feat_names
self.cat_feat_idx_dict_list = [{} for _ in range(26)]
# TODO: set vocabulary dictionary
vocab_dir = envs.get_global_env("feat_dict_name", None, "train.reader")
for i in range(26):
lookup_idx = 1 # remain 0 for default value
for line in open(
os.path.join(vocab_dir, 'C' + str(i + 1) + '.txt')):
self.cat_feat_idx_dict_list[i][line.strip()] = lookup_idx
lookup_idx += 1
def _process_line(self, line):
features = line.rstrip('\n').split('\t')
label_feat_list = [[] for _ in range(40)]
for idx in self.cont_idx_:
if features[idx] == '':
label_feat_list[idx].append(0)
else:
# 0-1 minmax norm
# label_feat_list[idx].append((float(features[idx]) - self.cont_min_[idx - 1]) /
# self.cont_diff_[idx - 1])
# log transform
label_feat_list[idx].append(
math.log(4 + float(features[idx]))
if idx == 2 else math.log(1 + float(features[idx])))
for idx in self.cat_idx_:
if features[idx] == '' or features[
idx] not in self.cat_feat_idx_dict_list[idx - 14]:
label_feat_list[idx].append(0)
else:
label_feat_list[idx].append(self.cat_feat_idx_dict_list[
idx - 14][features[idx]])
label_feat_list[0].append(int(features[0]))
return label_feat_list
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def data_iter():
label_feat_list = self._process_line(line)
yield list(zip(self.label_feat_names, label_feat_list))
return data_iter
\ No newline at end of file
......@@ -16,6 +16,11 @@ class Model(ModelBase):
self.dnn_use_bn = envs.get_global_env("hyper_parameters.dnn_use_bn", None, self._namespace)
self.clip_by_norm = envs.get_global_env("hyper_parameters.clip_by_norm", None, self._namespace)
cat_feat_num = envs.get_global_env("hyper_parameters.cat_feat_num", None, self._namespace)
self.sparse_inputs = self._sparse_data_var[1:]
self.dense_inputs = self._dense_data_var
self.target_input = self._sparse_data_var[0]
cat_feat_dims_dict = OrderedDict()
for line in open(cat_feat_num):
spls = line.strip().split()
......@@ -25,8 +30,8 @@ class Model(ModelBase):
)
self.is_sparse = envs.get_global_env("hyper_parameters.is_sparse", None, self._namespace)
self.dense_feat_names = ['I' + str(i) for i in range(1, 14)]
self.sparse_feat_names = ['C' + str(i) for i in range(1, 27)]
self.dense_feat_names = [i.name for i in self.dense_inputs]
self.sparse_feat_names = [i.name for i in self.sparse_inputs]
# {feat_name: dims}
self.feat_dims_dict = OrderedDict(
......@@ -36,21 +41,17 @@ class Model(ModelBase):
self.net_input = None
self.loss = None
def _create_embedding_input(self, data_dict):
def _create_embedding_input(self):
# sparse embedding
sparse_emb_dict = OrderedDict((name, fluid.embedding(
input=fluid.layers.cast(
data_dict[name], dtype='int64'),
size=[
self.feat_dims_dict[name] + 1,
6 * int(pow(self.feat_dims_dict[name], 0.25))
],
is_sparse=self.is_sparse)) for name in self.sparse_feat_names)
sparse_emb_dict = OrderedDict()
for var in self.sparse_inputs:
sparse_emb_dict[var.name] = fluid.embedding(input=var,
size=[self.feat_dims_dict[var.name] + 1,
6 * int(pow(self.feat_dims_dict[var.name], 0.25))
],is_sparse=self.is_sparse)
# combine dense and sparse_emb
dense_input_list = [
data_dict[name] for name in data_dict if name.startswith('I')
]
dense_input_list = self.dense_inputs
sparse_emb_list = list(sparse_emb_dict.values())
sparse_input = fluid.layers.concat(sparse_emb_list, axis=-1)
......@@ -97,14 +98,8 @@ class Model(ModelBase):
def train_net(self):
self.init_network()
self.target_input = fluid.data(
name='label', shape=[None, 1], dtype='float32')
data_dict = OrderedDict()
for feat_name in self.feat_dims_dict:
data_dict[feat_name] = fluid.data(
name=feat_name, shape=[None, 1], dtype='float32')
self.net_input = self._create_embedding_input(data_dict)
self.net_input = self._create_embedding_input()
deep_out = self._deep_net(self.net_input, self.dnn_hidden_units, self.dnn_use_bn, False)
......@@ -115,9 +110,6 @@ class Model(ModelBase):
logit = fluid.layers.fc(last_out, 1)
self.prob = fluid.layers.sigmoid(logit)
self._data_var = [self.target_input] + [
data_dict[dense_name] for dense_name in self.dense_feat_names
] + [data_dict[sparse_name] for sparse_name in self.sparse_feat_names]
# auc
prob_2d = fluid.layers.concat([1 - self.prob, self.prob], 1)
......@@ -127,9 +119,8 @@ class Model(ModelBase):
self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var
# logloss
logloss = fluid.layers.log_loss(self.prob, self.target_input)
logloss = fluid.layers.log_loss(self.prob, fluid.layers.cast(self.target_input, dtype='float32'))
self.avg_logloss = fluid.layers.reduce_mean(logloss)
# reg_coeff * l2_reg_cross
......
C1 139
C2 422
C3 1548
C4 1965
C5 54
C6 10
C7 3213
C8 81
C9 3
C10 2402
C11 2246
C12 1583
C13 1911
C14 24
C15 2011
C16 1731
C17 9
C18 1197
C19 584
C20 3
C21 1652
C22 8
C23 14
C24 1770
C25 40
C26 1349
此差异已折叠。
此差异已折叠。
f434fac1
e6051457
7e5c2ff4
abca0bad
3b509222
340c148e
48f8c5b9
3c9d8785
585b6ccc
561bf9d4
b474c2c2
c1730738
92fb1d87
05db9164
c35dc981
ae82ea21
824be517
16a99cfb
e8ef605b
88abfaf6
7ceef477
17f69355
1464facd
f0a33555
80e4d755
3ec5d916
f5c9f18c
87552397
5ebc3192
426610d2
eb6dcae0
651f6a2d
7f9f4eb6
bd4b6d14
3560b08b
8068dc7e
9660b97b
9eb7531c
2d4ea12b
87773c45
5a9ed9b0
f473b8dc
b19f768d
70d60005
89889f05
c71ae391
c6dce90e
64e77ae7
0e78bd46
75ac2fe6
42a16b9a
19c5f803
cbffbdad
bfb430af
127f4a6b
6ca3af46
2b3bff44
8a033483
45cb84c9
554adfdb
46300ee3
a14cf13a
d0d66375
da4eff0f
4265881a
9684fd4d
7382c353
50d4de26
60c68845
e3493c7c
09ca0b81
3b65d647
98237733
fc9c62bb
41edac3d
dbfc8345
39af2607
581e410c
55845e1c
28e55712
6bcf7a5b
66651cdf
2b92c0d2
24eda356
dbe63c2b
9a89b36c
489d0f96
dac91c28
dc5ebbd9
1a5f926e
885aeecb
f1548e14
6062d843
c2a5852e
68fd1e64
be589b51
b455c6d7
cd3695ae
291b7ba2
2998a458
5e53cc38
dbe15b41
ff5f3ab9
49f631b8
3b1bc654
36a5b3ff
fbc55dae
467085ca
06584483
3f6e3c8b
3cc2325b
ff004ae3
eb6ac63c
0a16e1d4
34f74dfd
decf6fa6
18988050
c512b859
a86f8721
5bfa8ab5
8cf07265
dd14f377
287e684f
49c4b7c4
2ebc17d3
8c6ba407
fb174e6b
4615a3b6
394fc830
9e9d28f5
241546e0
4a4e85c4
26428e51
940683b1
65aada8c
ba454362
d4b08d58
49807078
439a44a4
此差异已折叠。
此差异已折叠。
c1aef73d
2d81ed2c
a3bdcb7c
9248a1e8
a788321c
66b1e155
b5b8de53
fdab8598
4d9c9fa0
f1ce2ed4
fbbc41c2
c65541d1
a5de6c17
94f5aaee
c5d0e605
84d2c673
ee3501f0
4f3b0399
1606ff92
424ba327
192ffbec
640cd77c
9d0d5312
f9138878
fd461458
6499063c
5d65f22e
ed204454
dadde5ca
42c3797e
d4439b3f
7f508e0d
0aa6de84
6b9c3fee
7f658abc
b3c6e177
f88ba033
49e68fcc
140e9e27
d89e699e
826cb6b1
f1efc5f6
e22add65
13f2b8f1
06cf9db9
f38b9685
79b87c55
be9ad4e7
40d991f0
131e5de2
c198b273
d7c5e6ba
0a6fd594
90dfb495
5b97bb07
b96752b6
5b355b50
cf7e278f
84d46930
76ad996b
77fb35ab
feee3a16
957d6ee7
ea14b165
8c02ead9
f65b69a3
da27298a
cf724373
0d2a2c95
5662d3e8
b2fd56ef
d72a8b65
d8a69a76
61104d70
d674a6c9
4310190f
78a79932
3aac3e1b
35e0892f
7239dd00
9bf4fa6a
c995314a
e1756869
28b4e105
c385faef
31668efd
7eee8b4a
0273523d
061e59ac
5d00ecad
893f9e14
ce875433
087ef0e7
591e3c11
9aac7976
0734e0df
acb44480
a27f34ff
5989a764
9f32b866
3b90ab93
58a787a7
5595f556
cadcc5cd
9065c400
b7a2276b
827a0467
c71493ed
14d7c42b
3af94af0
867038d3
07f02922
3186644d
b866cd75
c534c129
863e573f
78d9706e
90c76b3c
ddd50acc
0af1d7f7
e145958a
b99ddbc8
f9de2371
d79b39c1
3df9cdd9
5ba57bfa
b9b911b1
8fbe0072
57c08194
c7883ba5
982104df
c111ef8c
28b79840
e678a74d
13bc7e19
5a594cea
adc24c45
cd2085cf
e2163351
2bd984a5
c5011072
e37f4bc1
a3e0d914
94ee7692
db60caf1
055ab34c
85a68f0f
db21b797
52b7a181
7ca611fd
ea734b1a
c06d6429
6532318c
d6b4fe71
b908eace
72f158e9
aa03db5c
7ea9a2f4
6dd83bc2
fa34f1cb
5bfa7585
7a9b3053
904e7ef6
d6c5488b
af0c1645
3af38a9c
6376dd6b
549524c2
e993816b
d0624fed
be29f7d4
f72523f1
877e0ebf
0942bb43
4e040fd9
df0f4ef2
cdd4f388
d6cbb5fb
5dac6850
08903b9d
a8ab776a
6119e0d4
d8399834
6c83c219
e539ae19
8933ce59
cbf608bb
ae9cdcd4
7a8f087d
bd13437e
90813faa
b4e60c78
e20558b3
183261ff
bafda429
af3d699c
317bfd7d
69f7e502
bb669e25
3b3e6ba1
e2c08cd9
6ebafa83
66372059
94a1cc80
dfbb09fb
f898f1b8
48bc5b52
98c0e953
6fd08f98
f3ad49a9
c66e830e
8a098a5d
a012b59f
b79346d7
7f78e6c7
68944c23
ba5aae2e
a705b156
a1364ca0
a0cf5647
a825c99b
2c4d9c4c
8125573a
1a69f1c0
e5b09bf6
c8c1e8a4
e2700d86
5172ee67
a389d767
2e61058e
6fb8f39a
021c82e5
6e76119f
ad972965
bef78a22
8a925a1b
4df84614
1da0c261
09200219
879f4082
f0be4a30
2397259a
b58812c1
33b83378
79d19519
1b0c8aa3
4d2af459
4cce3f75
0da1837b
662d25fe
3c7ba2bb
870771b1
673768e2
1a30ae06
362d4000
9d254525
b03f0955
1f6b8745
44f6116a
14d63538
49eb8265
03dce2a4
b1b76758
a8d07d00
2d2e6c55
cac79b4d
a0015d5d
87e248e6
cdd76771
aa910cce
e8c9d3fa
271f5122
5424fda7
5c935f2a
2598d8eb
78458b47
41d764b1
4afe6861
f25a8037
02472b09
803696a1
5c28d6c6
3a32a012
9c95a0df
79b98d3d
96e99a54
3c2d0e05
bdd0bd34
399cdaea
93de0c8e
76615e67
b7fb9997
7dcc3969
424d315c
7c09503b
58eb8589
2ad06856
d6e3ab87
c74f92a8
6eade2cc
e6deec50
e977ae2f
76951817
a90d7d9f
8b5867f2
5ea2e48b
6706ce51
604f499b
b9898409
b0f12191
982418bf
eebf94aa
ea50fcc9
672a3bf9
179a11e3
4d69e263
262ae33d
3743d561
0f80cdc7
7ddda62f
a3ca726d
504f5db4
84e5ac4e
f8ce562a
b67ac327
1d698bc2
1c77d5f1
800b8573
15d97f2a
18922c00
d000d519
b87a052d
d99fa921
05ec3803
7979221d
ba9a9658
2e1b4595
9c5a3598
6c7591c2
67cb474c
60e03064
fb8fab62
7b538f4b
b51dc799
5306e9ea
b787d76f
2eba67ae
a6bfd75c
77f29381
b12396c8
0706d4cc
7ea94441
b7f1d23a
449c20d0
ac612432
a0f41a51
bc1a3f28
2ed7ed80
39f1263d
69ae4278
5864de82
d365a3e3
f94df932
b992a469
bc69cf0b
1ee870da
cf7894f9
a85f443c
19fc0b79
88abab80
94368077
784088f0
cf30aac4
c28f349d
c8534259
6d065bbd
1b4ce856
eacd174d
1f54546e
f10c9cfd
5a92ccc8
17c0e328
17b9e35b
36407983
03afd96d
153ff04a
8229bc5b
ff35e49e
ca71e406
ecf3c050
05994a27
18407a62
d0275c5e
f0261606
560557e5
f9507afe
5cf1acf3
2e364a21
21a23bfe
3e302d42
863480c7
59fcba41
875b735e
1a1f723b
a6edc56f
15782fd0
553e02c3
184d6c51
e165d24e
5292f047
44776637
b661e386
5fc62500
04621bb1
58874c6c
434c4893
197dcb0e
67f771ae
a8fbe2f4
f2ea9889
7becd6e7
af908315
8ebd48c3
bd1662fd
e90a010f
c9cc4cf0
2a137d77
34e567a8
252162ec
ef0c2022
f7d97d7a
1ac36d08
2d878564
1bad82f2
9e2c7b68
2defaf33
af7fa246
bc0eb380
53becffa
2a93094b
9dedcb09
7161e106
a924e126
dc5b89d3
b293ab33
f89eb8f6
a1a65be6
74ba00cb
4a3850b0
ec8dca4c
13563125
e221fdc6
8a3b9b24
c504cddb
769e4c52
a0ca5294
bac9dcdb
ba8679a2
d54ff067
8882c6cd
5505dfb7
d326786e
2120ed69
2055fa1e
902ac8b1
375d8bbd
6647ec34
636dcdf5
251bc4a5
ef151f20
5b0180c6
9fa694f3
1205ef20
82ee7fd9
79cbc7f7
5e5ca0f9
c23bca28
6c28a86e
77602344
cd31013f
5594286f
5a4db0a2
b8f1a8df
ab22eb88
72e65cea
47d2b89c
467f06b7
7f91056a
76517c94
4d934f18
d3809c46
f23432d5
ee6c19b5
bd8165fc
e29f816a
34765ee9
8d33fe00
1dae3163
40edbbe8
8cc98d00
05c3b29a
894896ed
15ca9c06
e0a57f94
c8ace354
56bf7f9e
90edbc51
c665cd2b
c297ece7
422ad4cf
6ed1e4e2
0f8779c5
c12603f4
2daad6f3
2fe438ed
0ee90fc2
3ea9c523
c05ae48e
b191dfce
82bfc352
704ed80d
8a48eb95
f0f6a9c1
f217c8b4
8ace78d0
f3adf8be
6324e4bd
0105634a
0e3cae7e
062350df
9338777d
eb675c3b
fdc8950b
1d17ca13
6937c791
5c45a578
befd9d25
34b25eee
42778bea
bf2f0a8c
9548807f
7937deba
c79de9f7
10536ff9
3e72da54
43e3a426
63470841
68a6d325
893b7ae1
1c04d4f5
80becfff
9f457abd
865ad808
8a78a25e
3563ab62
ac5ccef6
50ed27ca
3c5900b5
96e389b3
cd749c9b
0dd7417d
d2278cf5
af5ce1ed
63d3fb31
709576ed
2d72ffd1
f4ec1778
69a5083d
34a2ebf1
8b103cf2
9ed3d0b9
2c1e69b6
ac6820fe
45bbb0ec
368c358b
4f230359
c703e271
0bc49df7
dc85594f
c0e6ed5c
257fcc4d
104fabc4
156f99ef
8f4497cb
ddc05636
19728a2f
45e9a7fa
a312e1e2
9ccb63a9
d9b23502
6663c4b0
179183e1
57676afe
9fd03f62
efa67886
06b297d9
41dd8d09
34645d5b
2dad7b23
961e6d6b
cc239583
d319cb43
b8ee36fc
0e26b386
c15e7f3e
a5571b9f
19ca8e3c
f9cd9fa8
e3c15540
e2c291a5
2a98cb06
7364e701
6a9dde60
9158b5a5
77348965
9ed8d6bc
39c0b4ea
8481d649
24d58844
5f27a931
b35573cd
28f7eeac
dcc9fc37
f0d5cc59
5aca94e6
792e199e
6169f967
1a4ffe88
fca38b4c
8b7c2178
c658f48e
6453e163
857a49b0
df8a4f07
47e8c514
01d4b4db
8c03a09b
ebf6ae0a
18b86fc6
f6e5b9ef
480bde65
8c5ece2a
1ce23264
2949c943
de898612
4bc80e2a
bca7e012
6536f6f8
d4fbf673
230267f3
aa03d2ee
d1fb0874
eb5ff98b
f4afbdcb
e31997ae
7d8db404
71345146
3565df81
ef6f097c
e9715419
adb4a533
8ae5e221
bb93f61b
1ca7a526
fa1eee27
590dfbb8
4fd67e13
c886a342
680d7261
855210f5
22cad86a
0f89aec9
fe9d0c5b
9aff23d7
8546f1ef
0a1485c1
4d360c97
d75edbf5
041d9426
9be71e89
f8ad4a41
a5f9a198
68a5e351
95812a33
4a67f833
a93a2643
0869abd6
253c11a9
fc75a704
d754a848
165dc2ce
82c3b58f
0da36732
f2d5da00
eff214f1
cb635ae2
67b31aac
641593be
33f5356f
89f7067a
38588c6b
e97cae00
45b12a68
3bef6c32
56e1a6c9
d5d86dcf
0a665a51
29dfcef7
80441957
dfc096e9
a3a9a46f
8396e6c0
50a702fb
d9714c1b
bf1f4a02
9a6d5824
a2a80116
2211576e
b94e4985
536a6954
bbe97cb9
7d7c6076
fa8e4000
ba2755e8
3493a15d
d9372f55
42bee2f2
a1cca232
bfd22f6f
719229e3
ff80ecd5
9b37f3c5
21161865
af97a54d
a6f57f5c
2098d925
fb885aa5
edd5a0ae
8e15c24e
c12fdf81
a719b8eb
38fe7ce8
bf474c97
b770657b
41b57a7c
304cec1b
6cae5a9d
1a5d089b
9f32d017
3bcac28d
e1f3056f
eb83af8a
2fd26f96
4eb9c398
a779839d
342476eb
a0a1b9a9
389eb0dc
5c55d0e0
4b715f92
e09a8161
92a663ff
16c8ad7b
da7b77d1
b411cbc7
f47c2c6a
7441161f
eeb07caf
1d27e688
1fa058ba
d60edc65
935327e3
c6e66003
bf2e5f33
6f2d0b37
4248353d
904fba4d
dde73338
3e86dc36
90db28fc
bee3806d
f42f28fd
5808459b
6aaba33c
1ea443ff
e2848a9a
72ca6191
8c92f967
fd352d95
32b1d348
276f3fdd
e5c1db3f
1e5bcb5d
f2bf9229
b7c15dd5
23977c44
cfc86806
5a1f6612
3ae289b3
71c32035
cead3a62
0a48382f
a3c23ad1
62ed9a91
ea83607f
942e2302
129cc160
2df201af
f5d84254
2f6a7c52
d8b234f6
4f4316df
c7a69a3b
7a27d4e1
cfe25cb7
83202629
34289160
21895686
24da5932
24d54eae
a0202926
81f69a8f
9132d455
424e28fe
3beaf220
2a6bd999
11fcf7fa
75587697
703226ec
bae6c746
8827de87
5b119c47
fec0c6ef
94235c77
20622e06
254ca0d7
e4e111a1
ed397d6b
5c4009e6
d083c277
23bc90a1
4efceca3
50165667
43c46d83
e4734e87
450f33a6
12711fa0
61ab0c4f
18b125c6
0826f297
76c72828
1731f3db
0e43ec3b
2630b570
977d7916
d7028959
ed9ffce7
ebb533c7
8cdc4941
4b625655
0ff31973
30111d93
11f44afd
f8b55668
1d6bfcff
e0cc99f3
ed5b84d4
eafd7f0c
4c31606a
fdb247ed
516c3bad
2a6d37f8
174d825f
69d2de5e
176858ad
2014c1b3
5332e3fb
10e9872f
afafa62a
5c84927d
9f2dff26
995e4a74
e0b0d930
3b917db0
54cb84ab
2f671908
0301901e
8249520b
6d4472d8
2c5e6524
7127a7d6
18e40a04
92d569ba
a0c32c81
27987cb9
376a4f52
95b585f9
8527be14
82665b78
07bdbe6b
2c7c4bba
25ca9201
7a97a313
aaef34d6
951fe4a9
0e528718
22608499
b015ed6c
22142f68
e657c595
e9562baf
d19c068c
f68514e9
539c5644
4757d03e
d69079e6
8a4d8e46
8066b103
ac639f12
b0fc60da
a63d1ffa
07f030ae
514fccea
39fff66b
4f25198d
99c1c8d3
574a31af
848b4d84
75c79158
167ba71f
71d55d49
d3f30591
55ffe9ca
255699b9
234f825d
18b9928d
64c7b338
0f4f2db4
8065cc64
76a79c33
f39ad7ae
b5d8545c
e9521d94
c9272d17
a5b4b78b
1c9ce10b
260ef2b6
ff838771
a6554629
4f879f5d
49fee879
8a11f111
aadcb74e
15b28eec
fe18e0cf
0096db77
ff8c6fd9
f6fe1d50
eb2a7aad
0f660539
8e838324
838b6876
4750aa00
6340457b
5ca5ea59
43183389
d010a692
1d29cbf8
97ad9659
7023b7d2
78022e82
e7b4ee44
a7d3279d
5f6ade89
0d261508
548118bd
88a8f5f6
23ae741f
44a0cf7f
940b6e42
ce197608
b4928074
98eb92dc
dd8781ec
26ffc51b
74906f90
676b1292
25644e7d
4ea4e9d5
83d7d5c2
834b5edc
32e1a215
06bd1916
a648ca66
f5d19c1c
aa787f00
5e78a000
28283f53
6953b65a
dbc14f56
154f316b
11d928f6
4d144b7d
34a77493
7c74405f
02d20151
52e91b9e
2c5269ac
a3391ca5
f6d35a1e
30e909b6
3ad1f48d
2eaa6a44
611d855e
e7378c05
cbc58cbc
0705b078
c889eb07
e333d643
511e6df7
5deda06e
72df8b8b
5091db5f
4866f28f
af487b63
b70bfe13
77145fc1
ada7568a
7452a48c
7b9e9937
d8bf293a
266666ac
13859eb7
7307c19e
cc99b33c
b938df15
f7cbe917
a6ae1271
7414723a
c47972c1
8d6a9f16
ce7ec713
9e7aafc2
397675e9
d712779e
afdf9a65
fd685f34
623049e6
5f4d1c67
fe2ae07d
3f66c36e
e25bdedd
fa6b2a51
b2526fea
54b7a508
98d78b2b
e2e599a7
479cc2e1
16191617
f2c35aba
9a103204
0bcd01e8
59f3245b
dc31f3dc
c228f276
8c5ebb31
3b9ae062
688190f3
ce2957ad
fde18531
b3e118e3
fcaae253
870620e6
ee4cd37c
7af65151
15b7ec34
ca193645
d7f5cb55
0e431092
4c6ddb1a
0afc6bf4
23121637
7eaa8ae0
8287dc29
7b570c07
87e6874d
c5825e99
a1dbab30
381dd9fd
476973dd
6ee4e8a3
7ec456cd
ec90808e
80fff3f2
f3a55aa0
4d72208f
1f27c37d
12a4b15a
6f92f1c3
55f78a0c
9eae5dcd
f3be137d
3b7201ca
e4c074db
374b2880
2a411f2a
6074c21f
4c000893
e0d76380
8eb3f772
c198896f
f96cc0b9
21a0ef88
7b79c094
d28c687a
c697eb6d
299582d9
c35a10e3
f36c0ae8
a8700c60
a53ad6d6
2e4c88c2
b853b799
14776f23
2479f13d
6a767367
23f7905b
4f47c10e
661c7493
2bf06664
62f43136
eec0e5f1
619f887c
7e01f09a
f919769c
d8c29807
20ea8abf
fc95c453
c1054c52
8e1de7db
183aee3b
a2722ce4
75ed459e
17ebf5d3
b9736368
4802d2e5
bf5cf2ca
4132f6f1
cf681365
eb865f73
ea803b7e
7aaa871c
ce298f58
e8934d81
87408e45
23253c33
171c9373
4dd1cabd
343da6e2
513555c7
2a064dba
f5d83e6f
cd9f20fa
ea260e89
1edfa625
8b82c64b
1d00cbc4
d2a980cd
80c43ab4
d73d5e92
c2bc8f73
98922d75
d898dd7f
3ee41b39
93bab460
39852814
241692b0
4f31b8af
4f7b022c
7e1106ce
7a09282f
496d0124
b8b41318
34a238e0
d07f4262
9e7f897d
7078d5bd
97bcec27
f91fd7b0
0a3254f1
422e8212
58d90787
5a962bf4
18fe96f5
33ad9b8a
993f2992
9b665b9c
ed5cfa27
91bc2f51
19199681
7e6c78cd
b438fc5b
325a69ed
e58930d5
eee3943b
4f1124f1
69d0b693
1fd56e82
336bb1d7
66a76a26
a9580b00
7b8c2a24
8376bb1e
8ff467ea
a70fb4d6
985464e6
cad8f137
349450bc
981eb85d
dcfd94f4
93c5771e
50ec33a6
4c8de1d7
e2cc7c06
3953854a
c78e8461
e23a52b4
1a3c8178
b456a550
d9455394
c07fefdc
bb0b487b
6fbed051
ae31f81d
05ccc530
9da25024
716265a1
961d324d
2edf6ee5
80111036
21551ee3
43fa7203
25b02fad
795646b5
50b7166d
2a4ef823
ec985270
7951b860
91f646f3
1cbc1420
cb0b50a9
4fb6247b
a9d84aa8
67995a64
d9a2039a
e1b40567
ac0e3f62
de1e5036
d818f210
c6cd927c
c857bb2b
c5244b96
47a0e9ff
e23891db
ebfb225c
6c6c6fff
3d40540f
ca4409a4
2828a546
276c03a1
8fa23f87
8e9d3e55
659daacd
d0c76b17
548cd6f7
9a841f69
bf413137
1d36bbc0
64e10296
657e3250
bd7db808
8e5c8813
6a3d6e52
58418528
fd6e6bcb
87c5daf8
df423e6d
0956dfee
488af1d3
27d47540
cdac3d6f
659ed597
5debd38d
e4df8c0b
7ac21686
fb9aaa0b
df78683f
2b0a63d7
38562f67
7e2167d5
92969770
0648c174
5b24eb53
be170224
a4d0248c
176cf3e5
729ecc4c
5f15d820
b519c595
8cd04d30
06a5d61b
720361aa
ae1bb660
ede54a1f
1d4d1a02
a48e62ea
46591921
96e4de4d
a4425bd8
f34e8f6a
8747d4c8
6741a372
46e4b83f
9f8dbf10
2215dddc
f3b2e496
4fc9d001
adf02f65
a7217873
79560450
ca9f3db8
f1760b27
bd7eec69
e3ea3d05
ac7833d4
c64bd5c0
875e8b1c
47fa5313
3d19618a
92f519ec
2d15871c
9699b949
218cd107
ed357f1d
936a332f
e2a965be
ba233d4f
d19e10db
dd727b99
3bec5d45
414d3dc9
bdd6affd
50391d84
38176faa
06ec1805
a9842147
e0b2c12a
e1a883d2
d9fc673a
37c28e63
defeb71b
d24c389e
18b3794f
07cecd0e
be45b877
732c8db2
a9ecf335
3bc47171
6803e296
d6f4cc32
5a276398
06fa8096
a0d2c974
ab5549ec
eb8ded57
c1dfa649
34db1f06
24c21652
ee11f1eb
1938e765
2a5bad22
66da8e94
83a0714b
63b34abd
43de72f6
d26682f5
e29f2fcf
06862b4a
e32bae69
33bea160
8a8a06bb
a325f22b
b9bee1c2
ad1492b0
534b9be3
f87e889e
f7e43e31
a2f4e8b5
ca0011c3
277cb5a2
b9f28c33
931a220d
3f9cbb6d
4d9cca5a
bf28da20
bde06ba1
14a72480
9cc98ead
160e1be5
d17f38c6
507ba343
2f06501b
7fc6984c
6f115481
e5e1ca92
0917c88e
74563ec1
9efba788
fa6037b9
28c51437
4454ea3a
f30da081
9378c2f0
2a73af0a
8ea37200
4919711a
7c5bf905
c2a85cd2
a2418cb1
e3af97ba
e7edcdba
22b8ec76
f1313990
ad365bb8
8529d3b4
1e087995
69873012
2436ff75
53bfa6f5
c47979b9
49507531
48f165b8
3dcc2635
a08bb4d7
6328e826
f161c883
f1ffcff0
31dad9c2
21084397
4cacc5df
dea90c39
8a582d5b
92ac7e35
4c6ad4f5
6bca71b1
ef8ef09d
3263408b
fe7ca040
28156fd4
3de41f4f
aa6b7700
c30bbcd1
22d6a7d8
5565c8ef
1bdd5433
f5b1b647
5f1d379c
90d80d1a
c3cdaf85
fb991bf5
70a58a12
e8a073ad
75c17e23
28540af6
8c30321c
520c7935
ea374c10
7d9b89ef
8b8cacf5
77120ffb
71fcc39d
8fe001f4
18979573
3512d101
9712b4e7
ace843df
610be298
47a3aa35
1c940221
4e67d4fa
3f8aec46
5c942915
048d525a
ffcecf14
cae2b00f
d08dd3b6
44f0eaa2
612905e5
1b2022a0
a5ee56ac
3096136c
3da52313
cb7fa42d
ed5c5acb
654bb16a
bb8c28a0
7609a1e0
15ce4793
b4339699
7ce3c66f
d8988024
63e498fc
394b6320
75529ad8
aef7b37a
d89a0026
ca4a4da9
24e81557
e8107cf4
83be0de2
d4dbce44
f9d5effa
f3dbd9b0
f24b551c
e599f97e
0cdb9a18
3020b608
d05bb1f7
7d5a4791
0ac9dec6
97cd01be
10ae0788
55b4ed2d
a7069b57
c3962347
e7750f14
526f00c4
a7ee0769
73c7614d
c260f168
138527f4
867f94c0
f56cc84a
f3d25ecf
b1121caf
b77b9c57
04047220
c9669737
dc906891
3f90e6f7
32528850
91f87a19
3a09994f
9441b6e1
291a0d2a
c60752a9
1241e747
136babe3
9556ca08
5f27bc59
3d6857f7
d267d56c
78a16776
11c577b0
ae70dc88
3fe585ab
3cc6c8de
17bcc684
7aeafd62
416d6970
05e13793
8d42130e
9311a066
b79a0855
1bb74bfa
7f0bbac3
fd2387f8
612652a9
a165a27c
be0e156a
b3a93317
02c27dec
8bb91717
此差异已折叠。
26ac7cf4
cf1fc48d
ad1cc976
e8dce07a
5aebfb83
b28479f6
64c94865
1adce6ef
32813e21
dcd762ee
051219e6
243a4e68
687dfaf4
f862f261
ab7390e9
f7c1b33f
d2dfe871
91233270
ec19f520
8ceecbc8
07d13a8f
0601d3b5
cfef1c29
0bc7c8c2
此差异已折叠。
此差异已折叠。
07c540c4
776ce399
8efede7f
3486227d
e5ba7672
1e88c74f
2005abd1
27c07bd6
d4bb7bd8
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
ad3062eb
49e825c5
ccfd4002
c9d4222a
78e2e389
8ec974f4
8651fddb
c0061c6d
93bad2c0
c3dc6cef
423fab69
25e3c76b
b264a060
85d5a995
be7c41b4
3a171ecb
c7dc6720
32c7478e
72592995
dbb486d7
bcdee96c
55dd3565
此差异已折叠。
e8b83407
7a402766
fd2fe0bd
ea9a246c
445bbe3b
8b8de563
9d93af03
c0812fc5
e13f3bf1
e0f2931a
07ee399f
5c813496
001f3601
59e2d823
9721386e
875ea8a7
82d3ae39
24657b11
2bf691b1
8f8c5acd
c243e98b
3a6f6b59
51c3d1d4
9b3e8820
ce62e669
f55c04b6
b9266ff0
47907db5
010f6491
f7839e21
46fbac64
3d2bedd7
724b04da
1575c75f
33d94071
f5b6afe5
c9f3bea7
f0f449dd
60c2b362
cb079c2d
此差异已折叠。
此差异已折叠。
此差异已折叠。
08da92ce
38ee9c99
b0530c50
8c837181
492fbc44
b4bae0ed
bf9f7f48
2319bb7d
06afddf0
b706ee81
3ca0f876
2c6b8ded
28bce97a
afcf7897
4f8b7acc
26eb6185
e856df70
ffab0078
4ea20c7d
47f98056
f3474129
0942e0a7
db844843
1524de30
b2241560
b974d47d
503ebb08
5a3e1872
db679829
384874ce
f1d40cbe
46dadd18
a9411994
d9131ab2
dd2d8e4d
4cf72387
d3ae3ce1
3597f508
f7109724
30903e74
65be028e
27bf8a17
3bb20e22
f281d2a7
d5b7606b
cadb6f23
a93acb09
229df405
307e775a
43b19349
89ff5705
a444653d
25c83c98
42d2cbf8
7e0ccccf
f1f2de2d
c05778d5
fe6b92e5
c76aecf6
6f6d9be8
13718bbd
e3520422
3bf701e7
fbad5c96
此差异已折叠。
f504a6f4
0b153874
7b6fecd5
235697d5
64523cfa
316074ea
0017bc7c
afc9ca6a
a6d156f4
13037314
d7c4a8f5
e350fe4f
931c3bf7
361384ce
6a698541
43383eb4
73b7901e
cb66451f
f0e5818a
0fb392dd
4671807e
da1ed842
51d76abe
5b392875
e602701d
f0298c3c
a25968f2
67b76963
cbe011a6
1296137f
d3334ebc
f1aa21a3
23eefdc2
322e63df
a674580f
d1b66f7a
a8d6f709
66c1ef42
813607cc
966033bc
e7945bc1
77b89023
73ba467e
169d7cc1
8bedcc53
9d01afb9
bb170c38
233428af
07a0b7e5
560f93f8
9c376700
d1aaef6c
71f9a260
449116d5
c5e75280
ba7cbdc6
25611aba
56563555
cb69809d
25239412
093a9651
a61cc0ef
062b5529
c7453af1
b621aeb8
45f7c2dd
49dd1874
8ee20c61
62c159ed
c8ddd494
1f89b562
0b5a4776
37e4aa92
985e3fcb
efaa8b67
66f29b89
6c41e35e
5b9f3341
271b0642
a04b3fa3
e6210023
......@@ -22,9 +22,10 @@ train:
reader:
batch_size: 2
class: "{workspace}/criteo_reader.py"
train_data_path: "{workspace}/data/train_data"
feat_dict_name: "{workspace}/data/aid_data/feat_dict_10.pkl2"
train_data_path: "{workspace}/slot_data/train_data"
feat_dict_name: "{workspace}/slot_data/feat_dict_10.pkl2"
sparse_slots: "label feat_idx"
dense_slots: "feat_value:39"
model:
models: "{workspace}/model.py"
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try:
import cPickle as pickle
except ImportError:
import pickle
class TrainReader(Reader):
def init(self):
self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
self.cont_max_ = [
5775, 257675, 65535, 969, 23159456, 431037, 56311, 6047, 29019, 46,
231, 4008, 7393
]
self.cont_diff_ = [
self.cont_max_[i] - self.cont_min_[i]
for i in range(len(self.cont_min_))
]
self.continuous_range_ = range(1, 14)
self.categorical_range_ = range(14, 40)
# load preprocessed feature dict
self.feat_dict_name = envs.get_global_env("feat_dict_name", None, "train.reader")
self.feat_dict_ = pickle.load(open(self.feat_dict_name, 'rb'))
def _process_line(self, line):
features = line.rstrip('\n').split('\t')
feat_idx = []
feat_value = []
for idx in self.continuous_range_:
if features[idx] == '':
feat_idx.append(0)
feat_value.append(0.0)
else:
feat_idx.append(self.feat_dict_[idx])
feat_value.append(
(float(features[idx]) - self.cont_min_[idx - 1]) /
self.cont_diff_[idx - 1])
for idx in self.categorical_range_:
if features[idx] == '' or features[idx] not in self.feat_dict_:
feat_idx.append(0)
feat_value.append(0.0)
else:
feat_idx.append(self.feat_dict_[features[idx]])
feat_value.append(1.0)
label = [int(features[0])]
return feat_idx, feat_value, label
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def data_iter():
feat_idx, feat_value, label = self._process_line(line)
yield [('feat_idx', feat_idx), ('feat_value', feat_value), ('label', label)]
return data_iter
\ No newline at end of file
......@@ -18,20 +18,13 @@ class Model(ModelBase):
# ------------------------- network input --------------------------
num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace)
raw_feat_idx = fluid.data(name='feat_idx', shape=[None, num_field], dtype='int64') # None * num_field(defalut:39)
raw_feat_value = fluid.data(name='feat_value', shape=[None, num_field], dtype='float32') # None * num_field
self.label = fluid.data(name='label', shape=[None, 1], dtype='float32') # None * 1
feat_idx = fluid.layers.reshape(raw_feat_idx,[-1, 1]) # (None * num_field) * 1
feat_value = fluid.layers.reshape(raw_feat_value, [-1, num_field, 1]) # None * num_field * 1
# ------------------------- set _data_var --------------------------
raw_feat_idx = self._sparse_data_var[1]
raw_feat_value = self._dense_data_var[0]
self.label = self._sparse_data_var[0]
self._data_var.append(raw_feat_idx)
self._data_var.append(raw_feat_value)
self._data_var.append(self.label)
if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
feat_idx = raw_feat_idx
feat_value = fluid.layers.reshape(raw_feat_value, [-1, num_field, 1]) # None * num_field * 1
#------------------------- first order term --------------------------
......@@ -123,7 +116,7 @@ class Model(ModelBase):
#------------------------- Cost(logloss) --------------------------
cost = fluid.layers.log_loss(input=self.predict, label=self.label)
cost = fluid.layers.log_loss(input=self.predict, label=fluid.layers.cast(self.label, "float32"))
avg_cost = fluid.layers.reduce_sum(cost)
self._cost = avg_cost
......
此差异已折叠。
......@@ -22,9 +22,10 @@ train:
reader:
batch_size: 2
class: "{workspace}/../criteo_reader.py"
train_data_path: "{workspace}/data/train"
train_data_path: "{workspace}/slot_data/train"
reader_debug_mode: False
sparse_slots: "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
dense_slots: "dense_var:13"
model:
models: "{workspace}/model.py"
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -22,8 +22,9 @@ train:
reader:
batch_size: 2
class: "{workspace}/reader.py"
train_data_path: "{workspace}/data/train_data"
train_data_path: "{workspace}/slot_data/train_data"
sparse_slots: "label"
dense_slots: "wide_input:8 deep_input:58"
model:
models: "{workspace}/model.py"
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册