提交 a58b30c6 编写于 作者: Z zhoushiyu 提交者: Thunderbrook

[PaddleRec] fix Nan loss and hash problem in dnn model (#4067)

* fix paddlerec dnn loss nan

* change hash with mmh3.hash in dnn
上级 d1c74b39
...@@ -201,13 +201,13 @@ def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True): ...@@ -201,13 +201,13 @@ def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True):
initializer=fluid.initializer.Normal( initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc2.shape[1])))) scale=1 / math.sqrt(fc2.shape[1]))))
predict = fluid.layers.fc(input=fc3, predict = fluid.layers.fc(input=fc3,
size=2, size=1,
act='softmax', act='sigmoid',
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal( initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc3.shape[1])))) scale=1 / math.sqrt(fc3.shape[1]))))
cost = fluid.layers.cross_entropy(input=predict, label=words[-1]) cost = fluid.layers.log_loss(input=predict, label=fluid.layers.cast(words[-1], dtype="float32"))
avg_cost = fluid.layers.reduce_sum(cost) avg_cost = fluid.layers.reduce_sum(cost)
accuracy = fluid.layers.accuracy(input=predict, label=words[-1]) accuracy = fluid.layers.accuracy(input=predict, label=words[-1])
auc_var, batch_auc_var, auc_states = \ auc_var, batch_auc_var, auc_states = \
......
import mmh3
class Dataset: class Dataset:
def __init__(self): def __init__(self):
pass pass
...@@ -43,7 +46,8 @@ class CriteoDataset(Dataset): ...@@ -43,7 +46,8 @@ class CriteoDataset(Dataset):
self.cont_diff_[idx - 1]) self.cont_diff_[idx - 1])
for idx in self.categorical_range_: for idx in self.categorical_range_:
sparse_feature.append([ sparse_feature.append([
hash(str(idx) + features[idx]) % self.hash_dim_ mmh3.hash(str(idx) + features[idx]) %
self.hash_dim_
]) ])
label = [int(features[0])] label = [int(features[0])]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册