未验证 提交 5e574d41 编写于 作者: Z zhang wenhui 提交者: GitHub

update 2.0 api (#4921)

* update api 1.8

* fix paddlerec readme

* fix readme

* update 2.0 api

* fix, test=develop
上级 9bf27322
# DeepFM动态图
以下是本例的简要目录结构及说明:
......@@ -65,9 +64,9 @@ CUDA_VISIBLE_DEVICES=0 python infer.py --checkpoint=models/epoch_0
## 效果
```text
test auc of epoch 0 is 0.802877
test auc of epoch 0 is 0.78+
```
第一轮数据训练结束后,test auc为0.802877
第一轮数据训练结束后,test auc为0.78+
继续训练模型易出现过拟合现象,可以通过评估模型选择效果最好的模型作为最终训练结果。
......@@ -3,8 +3,7 @@ from __future__ import print_function
import os
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
import paddle
import logging
import time
......@@ -19,72 +18,74 @@ logger = logging.getLogger(__name__)
def infer(args):
if args.use_gpu:
place = fluid.CUDAPlace(0)
place = paddle.CUDAPlace(0)
else:
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
deepfm = DeepFM(args)
test_filelist = [
os.path.join(args.test_data_dir, x)
for x in os.listdir(args.test_data_dir)
]
test_reader = data_reader.data_reader(
args.batch_size, test_filelist, args.feat_dict, data_type="test")
# load model
if args.checkpoint:
model_dict, optimizer_dict = fluid.dygraph.load_dygraph(
args.checkpoint)
deepfm.set_dict(model_dict)
logger.info("load model {} finished.".format(args.checkpoint))
else:
logger.error("no model to load!")
logger.error("please set model to load in --checkpoint first.")
exit(1)
def eval():
deepfm.eval()
logger.info("start eval model.")
total_step = 0
batch_begin = time.time()
auc_metric_test = fluid.metrics.Auc("ROC")
for data in test_reader():
total_step += 1
raw_feat_idx, raw_feat_value, label = zip(*data)
raw_feat_idx = np.array(raw_feat_idx, dtype=np.int64)
raw_feat_value = np.array(raw_feat_value, dtype=np.float32)
label = np.array(label, dtype=np.int64)
raw_feat_idx, raw_feat_value, label = [
to_variable(i)
for i in [raw_feat_idx, raw_feat_value, label]
]
predict = deepfm(raw_feat_idx, raw_feat_value, label)
# for auc
predict_2d = fluid.layers.concat([1 - predict, predict], 1)
auc_metric_test.update(
preds=predict_2d.numpy(), labels=label.numpy())
if total_step > 0 and total_step % 100 == 0:
logger.info(
"TEST --> batch: {} auc: {:.6f} speed: {:.2f} ins/s".
format(total_step,
auc_metric_test.eval(), 100 * args.batch_size / (
time.time() - batch_begin)))
batch_begin = time.time()
logger.info("test auc is %.6f" % auc_metric_test.eval())
begin = time.time()
eval()
logger.info("test finished, cost %f s" % (time.time() - begin))
place = paddle.CPUPlace()
paddle.disable_static(place)
deepfm = DeepFM(args)
test_filelist = [
os.path.join(args.test_data_dir, x)
for x in os.listdir(args.test_data_dir)
]
test_reader = data_reader.data_reader(
args.batch_size, test_filelist, args.feat_dict, data_type="test")
# load model
if args.checkpoint:
model_dict, optimizer_dict = paddle.fluid.dygraph.load_dygraph(
args.checkpoint)
deepfm.set_dict(model_dict)
logger.info("load model {} finished.".format(args.checkpoint))
else:
logger.error("no model to load!")
logger.error("please set model to load in --checkpoint first.")
exit(1)
def eval():
deepfm.eval()
logger.info("start eval model.")
total_step = 0
batch_begin = time.time()
auc_metric_test = paddle.fluid.metrics.Auc("ROC")
for data in test_reader():
total_step += 1
raw_feat_idx, raw_feat_value, label = zip(*data)
raw_feat_idx = np.array(raw_feat_idx, dtype=np.int64)
raw_feat_value = np.array(raw_feat_value, dtype=np.float32)
label = np.array(label, dtype=np.int64)
raw_feat_idx, raw_feat_value, label = [
paddle.to_tensor(
data=i, dtype=None, place=None, stop_gradient=True)
for i in [raw_feat_idx, raw_feat_value, label]
]
predict = deepfm(raw_feat_idx, raw_feat_value, label)
# for auc
predict_2d = paddle.concat(x=[1 - predict, predict], axis=1)
auc_metric_test.update(
preds=predict_2d.numpy(), labels=label.numpy())
if total_step > 0 and total_step % 100 == 0:
logger.info(
"TEST --> batch: {} auc: {:.6f} speed: {:.2f} ins/s".format(
total_step,
auc_metric_test.eval(), 100 * args.batch_size / (
time.time() - batch_begin)))
batch_begin = time.time()
logger.info("test auc is %.6f" % auc_metric_test.eval())
begin = time.time()
eval()
logger.info("test finished, cost %f s" % (time.time() - begin))
paddle.enable_static()
if __name__ == '__main__':
args = utils.parse_args()
utils.print_arguments(args)
......
......@@ -2,7 +2,6 @@ import math
import paddle
class DeepFM(paddle.nn.Layer):
def __init__(self, args):
super(DeepFM, self).__init__()
......@@ -13,8 +12,8 @@ class DeepFM(paddle.nn.Layer):
self.dnn = DNN(args)
def forward(self, raw_feat_idx, raw_feat_value, label):
feat_idx = paddle.fluid.layers.reshape(raw_feat_idx,
[-1, 1]) # (None * num_field) * 1
feat_idx = paddle.fluid.layers.reshape(
raw_feat_idx, [-1, 1]) # (None * num_field) * 1
feat_value = paddle.fluid.layers.reshape(
raw_feat_value,
[-1, self.args.num_field, 1]) # None * num_field * 1
......@@ -23,7 +22,8 @@ class DeepFM(paddle.nn.Layer):
feat_value)
y_dnn = self.dnn(feat_embeddings)
predict = paddle.nn.functional.sigmoid(y_first_order + y_second_order + y_dnn)
predict = paddle.nn.functional.sigmoid(y_first_order + y_second_order +
y_dnn)
return predict
......@@ -39,7 +39,7 @@ class FM(paddle.nn.Layer):
padding_idx=0,
param_attr=paddle.ParamAttr(
initializer=paddle.nn.initializer.TruncatedNormal(
loc=0.0, scale=self.init_value_),
mean=0.0, std=self.init_value_),
regularizer=paddle.fluid.regularizer.L1DecayRegularizer(
self.args.reg)))
self.embedding = paddle.fluid.dygraph.nn.Embedding(
......@@ -48,8 +48,8 @@ class FM(paddle.nn.Layer):
padding_idx=0,
param_attr=paddle.ParamAttr(
initializer=paddle.nn.initializer.TruncatedNormal(
loc=0.0,
scale=self.init_value_ /
mean=0.0,
std=self.init_value_ /
math.sqrt(float(self.args.embedding_size)))))
def forward(self, feat_idx, feat_value):
......@@ -69,16 +69,16 @@ class FM(paddle.nn.Layer):
feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size
# sum_square part
summed_features_emb = paddle.reduce_sum(
feat_embeddings, 1) # None * embedding_size
summed_features_emb = paddle.reduce_sum(feat_embeddings,
1) # None * embedding_size
summed_features_emb_square = paddle.square(
summed_features_emb) # None * embedding_size
# square_sum part
squared_features_emb = paddle.square(
feat_embeddings) # None * num_field * embedding_size
squared_sum_features_emb = paddle.reduce_sum(
squared_features_emb, 1) # None * embedding_size
squared_sum_features_emb = paddle.reduce_sum(squared_features_emb,
1) # None * embedding_size
y_second_order = 0.5 * paddle.reduce_sum(
summed_features_emb_square - squared_sum_features_emb,
......@@ -93,7 +93,8 @@ class DNN(paddle.nn.Layer):
super(DNN, self).__init__()
self.args = args
self.init_value_ = 0.1
sizes = [self.args.num_field * self.args.embedding_size] + self.args.layer_sizes + [1]
sizes = [self.args.num_field * self.args.embedding_size
] + self.args.layer_sizes + [1]
acts = [self.args.act
for _ in range(len(self.args.layer_sizes))] + [None]
w_scales = [
......@@ -107,10 +108,10 @@ class DNN(paddle.nn.Layer):
out_features=sizes[i + 1],
weight_attr=paddle.ParamAttr(
initializer=paddle.nn.initializer.TruncatedNormal(
loc=0.0, scale=w_scales[i])),
mean=0.0, std=w_scales[i])),
bias_attr=paddle.ParamAttr(
initializer=paddle.nn.initializer.TruncatedNormal(
loc=0.0, scale=self.init_value_)))
mean=0.0, std=self.init_value_)))
#linear = getattr(paddle.nn.functional, acts[i])(linear) if acts[i] else linear
if acts[i] == 'relu':
act = paddle.nn.ReLU()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册