未验证 提交 ccdbfe77 编写于 作者: O overlordmax 提交者: GitHub

Ncf 04291644 (#4585)

* fix bugs

* fix bugs

* add wide_deep

* fix code style

* fix code style

* fix some bugs

* fix filename

* add ncf

* add download data

* add download data
上级 91cfa6f8
import scipy.sparse as sp
import numpy as np
from time import time
import args
class Dataset(object):
def __init__(self, path):
self.testRatings = self.load_rating_file_as_list(path + ".test.rating")
self.testNegatives = self.load_negative_file(path + ".test.negative")
assert len(self.testRatings) == len(self.testNegatives)
def load_rating_file_as_list(self, filename):
ratingList = []
with open(filename, "r") as f:
line = f.readline()
while line != None and line != "":
arr = line.split("\t")
user, item = int(arr[0]), int(arr[1])
ratingList.append([user, item])
line = f.readline()
return ratingList
def load_negative_file(self, filename):
negativeList = []
with open(filename, "r") as f:
line = f.readline()
while line != None and line != "":
arr = line.split("\t")
negatives = []
for x in arr[1: ]:
negatives.append(int(x))
negativeList.append(negatives)
line = f.readline()
return negativeList
# NCF
以下是本例的简要目录结构及说明:
```
├── README.md # 文档
├── requirements.txt # 需要的安装包
├── gmf.py # gmf网络文件
├── mlp.py # mlp网络文件
├── neumf.py # neumf网络文件
├── create_data.sh # 生成训练数据脚本
├── Dataset.py # 测试数据集处理
├── get_train_data.py # 生成测试数据集
├── evaluate.py # 预测并计算指标文件
├── train.py # 训练文件
├── infer.py # 预测文件
├── args.py # 参数文件
├── utils.py # 通用函数
├── train_gpu.sh # gpu训练shell脚本
├── train_cpu.sh # cpu训练shell脚本
```
## 简介
很多应用场景,并没有显性反馈的存在。因为大部分用户是沉默的用户,并不会明确给系统反馈“我对这个物品的偏好值是多少”。因此,推荐系统可以根据大量的隐性反馈来推断用户的偏好值。[《Neural Collaborative Filtering 》](https://arxiv.org/pdf/1708.05031.pdf)作者利用深度学习来对user和item特征进行建模,使模型具有非线性表达能力。具体来说使用多层感知机来学习user-item交互函数,提出了一种隐性反馈协同过滤解决方案。
## 环境
PaddlePaddle 1.7.0
python3.7
## 数据下载及预处理
[Data.zip](https://paddlerec.bj.bcebos.com/ncf/Data.zip)
在create_data.sh脚本文件中添加文件的路径,并运行脚本。
```sh
mkdir Data
pip install -r requirements.txt #安装必需包
wget -P Data https://paddlerec.bj.bcebos.com/ncf/Data.zip #下载数据集
unzip Data/Data.zip -d Data/
python get_train_data.py --num_neg 4 \ #负采样个数
--train_data_path "Data/train_data.csv" #生成训练数据
```
## 单机训练
GPU环境
在train_gpu.sh脚本文件中设置好数据路径、参数。
```sh
CUDA_VISIBLE_DEVICES=0 python train.py --use_gpu 1 \ #使用gpu
--NeuMF 1 \ #nn和gmf网络结合
--epochs 20 \ #训练轮次
--batch_size 256 \ #batch大小
--num_factors 8 \ #gmf网络输入的embedding大小
--num_neg 4 \ #负采样个数
--lr 0.001 \ #学习率
--model_dir 'model_dir' #模型保存目录
```
修改脚本的可执行权限并运行
```
./train_gpu.sh
```
CPU环境
在train_cpu.sh脚本文件中设置好数据路径、参数。
```sh
python train.py --use_gpu 0 \ #使用cpu
--NeuMF 1 \ #nn和gmf网络结合
--epochs 20 \ #训练轮次
--batch_size 256 \ #batch大小
--num_factors 8 \ #gmf网络输入的embedding大小
--num_neg 4 \ #负采样个数
--lr 0.001 \ #学习率
--model_dir 'model_dir' #模型保存目录
```
修改脚本的可执行权限并运行
```
./train_cpu.sh
```
## 单机预测
预测使用CPU环境,速度较快。
```
python infer.py
```
## 模型效果
训练:
```
use_gpu:1, NeuMF:1, epochs:20, batch_size:256, num_factors:8, num_neg:4, lr:0.001, model_dir:model_dir, layers:[64, 32, 16, 8]
W0428 12:15:20.169631 1161 device_context.cc:237] Please NOTE: device: 0, CUDA Capability: 70, Driver API Version: 10.1, Runtime API Version: 9.0
W0428 12:15:20.173840 1161 device_context.cc:245] device: 0, cuDNN Version: 7.3.
2020-04-28 12:15:21,945-INFO: epoch: 0, batch_id: 0, batch_time: 0.01069s, loss: 0.69115
2020-04-28 12:15:21,956-INFO: epoch: 0, batch_id: 1, batch_time: 0.00917s, loss: 0.68997
2020-04-28 12:15:21,976-INFO: epoch: 0, batch_id: 2, batch_time: 0.00901s, loss: 0.68813
...
2020-04-28 12:15:22,726-INFO: epoch: 0, batch_id: 72, batch_time: 0.00874s, loss: 0.44167
2020-04-28 12:15:22,736-INFO: epoch: 0, batch_id: 73, batch_time: 0.00862s, loss: 0.44800
2020-04-28 12:15:22,746-INFO: epoch: 0, batch_id: 74, batch_time: 0.00871s, loss: 0.43535
```
预测:
在参数epoch:20,num_factors:8及用指标HR@10、NDCG@10与论文进行对比:
本例:
```
2020-04-28 12:17:56,541-INFO: epoch: 20, epoch_time: 101.68907s, HR: 0.57268, NDCG: 0.32499
```
论文:
```
HR: 0.688, NDCG: 0.410
```
import argparse
def parse_args():
parser = argparse.ArgumentParser(description="Run GMF.")
parser.add_argument('--path', nargs='?', default='Data/', help='Input data path.')
parser.add_argument('--dataset', nargs='?', default='ml-1m', help='Choose a dataset.')
parser.add_argument('--epochs', type=int, default=20, help='Number of epochs.')
parser.add_argument('--batch_size', type=int, default=256, help='Batch size.')
parser.add_argument('--test_batch_size', type=int, default=100, help='Batch size.')
parser.add_argument('--num_factors', type=int, default=8, help='Embedding size.')
parser.add_argument('--num_users', type=int, default=6040, help='num_users')
parser.add_argument('--num_items', type=int, default=3706, help='num_users')
parser.add_argument('--num_neg', type=int, default=4, help='Number of negative instances to pair with a positive instance.')
parser.add_argument('--lr', type=float, default=0.001, help='Learning rate.')
parser.add_argument('--train_data_path', type=str, default="Data/train_data.csv", help='train_data_path')
parser.add_argument('--test_data_path', type=str, default="Data/test.txt", help='train_data_path')
parser.add_argument('--model_dir', type=str, default="model_dir", help='model_dir.')
parser.add_argument('--use_gpu', type=int, default=0, help='use_gpu')
parser.add_argument('--GMF', type=int, default=0, help='GMF')
parser.add_argument('--MLP', type=int, default=0, help='MLP')
parser.add_argument('--NeuMF', type=int, default=0, help='NeuMF')
parser.add_argument('--layers', nargs='?', default=[64,32,16,8],
help="MLP layers. Note that the first layer is the concatenation of user and item embeddings. So layers[0]/2 is the embedding size.")
return parser.parse_args()
\ No newline at end of file
mkdir Data
pip install -r requirements.txt
wget -P Data https://paddlerec.bj.bcebos.com/ncf/Data.zip
unzip Data/Data.zip -d Data/
python get_train_data.py --num_neg 4 \
--train_data_path "Data/train_data.csv"
\ No newline at end of file
import math
import heapq # for retrieval topK
import multiprocessing
import numpy as np
from time import time
import paddle.fluid as fluid
import os
from gmf import GMF
from mlp import MLP
from neumf import NeuMF
from Dataset import Dataset
import logging
import paddle
import args
import utils
import time
#from numba import jit, autojit
# Global variables that are shared across processes
_model = None
_testRatings = None
_testNegatives = None
_K = None
_args = None
_model_path = None
def run_infer(args, model_path, test_data_path):
test_data_generator = utils.CriteoDataset()
with fluid.scope_guard(fluid.Scope()):
test_reader = paddle.batch(test_data_generator.test(test_data_path, False), batch_size=args.test_batch_size)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model(model_path, exe)
for data in test_reader():
user_input = np.array([dat[0] for dat in data])
item_input = np.array([dat[1] for dat in data])
pred_val = exe.run(infer_program,
feed={"user_input": user_input,
"item_input": item_input},
fetch_list=fetch_vars,
return_numpy=True)
return pred_val[0].reshape(1, -1).tolist()[0]
def evaluate_model(args, testRatings, testNegatives, K, model_path):
"""
Evaluate the performance (Hit_Ratio, NDCG) of top-K recommendation
Return: score of each test rating.
"""
global _model
global _testRatings
global _testNegatives
global _K
global _model_path
global _args
_args = args
_model_path= model_path
_testRatings = testRatings
_testNegatives = testNegatives
_K = K
hits, ndcgs = [],[]
for idx in range(len(_testRatings)):
(hr,ndcg) = eval_one_rating(idx)
hits.append(hr)
ndcgs.append(ndcg)
return (hits, ndcgs)
def eval_one_rating(idx):
rating = _testRatings[idx]
items = _testNegatives[idx]
u = rating[0]
gtItem = rating[1]
items.append(gtItem)
# Get prediction scores
map_item_score = {}
users = np.full(len(items), u, dtype = 'int32')
users = users.reshape(-1,1)
items_array = np.array(items).reshape(-1,1)
temp = np.hstack((users, items_array))
np.savetxt("Data/test.txt", temp, fmt='%d', delimiter=',')
predictions = run_infer(_args, _model_path, _args.test_data_path)
for i in range(len(items)):
item = items[i]
map_item_score[item] = predictions[i]
items.pop()
# Evaluate top rank list
ranklist = heapq.nlargest(_K, map_item_score, key=map_item_score.get)
hr = getHitRatio(ranklist, gtItem)
ndcg = getNDCG(ranklist, gtItem)
return (hr, ndcg)
def getHitRatio(ranklist, gtItem):
for item in ranklist:
if item == gtItem:
return 1
return 0
def getNDCG(ranklist, gtItem):
for i in range(len(ranklist)):
item = ranklist[i]
if item == gtItem:
return math.log(2) / math.log(i+2)
return 0
import scipy.sparse as sp
import numpy as np
from time import time
import args
def get_train_data(filename, write_file, num_negatives):
'''
Read .rating file and Return dok matrix.
The first line of .rating file is: num_users\t num_items
'''
# Get number of users and items
num_users, num_items = 0, 0
with open(filename, "r") as f:
line = f.readline()
while line != None and line != "":
arr = line.split("\t")
u, i = int(arr[0]), int(arr[1])
num_users = max(num_users, u)
num_items = max(num_items, i)
line = f.readline()
# Construct matrix
mat = sp.dok_matrix((num_users+1, num_items+1), dtype=np.float32)
with open(filename, "r") as f:
line = f.readline()
while line != None and line != "":
arr = line.split("\t")
user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
if (rating > 0):
mat[user, item] = 1.0
line = f.readline()
file = open(write_file, 'w')
print("writing " + write_file)
for (u, i) in mat.keys():
# positive instance
user_input = str(u)
item_input = str(i)
label = str(1)
sample = "{0},{1},{2}".format(user_input, item_input,label) + "\n"
file.write(sample)
# negative instances
for t in range(num_negatives):
j = np.random.randint(num_items)
while (u, j) in mat.keys():
j = np.random.randint(num_items)
user_input = str(u)
item_input = str(j)
label = str(0)
sample = "{0},{1},{2}".format(user_input, item_input,label) + "\n"
file.write(sample)
if __name__ == "__main__":
args = args.parse_args()
get_train_data(args.path + args.dataset + ".train.rating", args.train_data_path, args.num_neg)
\ No newline at end of file
import numpy as np
import paddle.fluid as fluid
import sys
import math
from time import time
class GMF(object):
def net(self, inputs, num_users, num_items, latent_dim):
MF_Embedding_User = fluid.embedding(input=inputs[0],
size=[num_users, latent_dim],
param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
is_sparse=True)
MF_Embedding_Item = fluid.embedding(input=inputs[1],
size=[num_items, latent_dim],
param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
is_sparse=True)
predict_vector = fluid.layers.elementwise_mul(MF_Embedding_User, MF_Embedding_Item)
prediction = fluid.layers.fc(input=predict_vector,
size=1,
act='sigmoid',
param_attr=fluid.initializer.MSRAInitializer(uniform=True),
name='prediction')
cost = fluid.layers.log_loss(input=prediction, label=fluid.layers.cast(x=inputs[2], dtype='float32'))
avg_cost = fluid.layers.mean(cost)
return avg_cost, prediction
\ No newline at end of file
import numpy as np
import os
import paddle.fluid as fluid
from gmf import GMF
from mlp import MLP
from neumf import NeuMF
from Dataset import Dataset
from evaluate import evaluate_model
import logging
import paddle
import args
import utils
import time
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)
if __name__ == "__main__":
args = args.parse_args()
dataset = Dataset(args.path + args.dataset)
testRatings, testNegatives = dataset.testRatings, dataset.testNegatives
topK = 10
begin = time.time()
model_path = args.model_dir + "/epoch_" + str(args.epochs - 1)
(hits, ndcgs) = evaluate_model(args, testRatings, testNegatives, topK, model_path)
hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
end = time.time()
logger.info("epoch: {}, epoch_time: {:.5f}s, HR: {:.5f}, NDCG: {:.5f}".format(args.epochs, end - begin, hr, ndcg))
\ No newline at end of file
import numpy as np
import paddle.fluid as fluid
import sys
import math
from time import time
class MLP(object):
def net(self, inputs, num_users, num_items, layers = [20, 10]):
num_layer = len(layers) #Number of layers in the MLP
MLP_Embedding_User = fluid.embedding(input=inputs[0],
size=[num_users, int(layers[0] / 2)],
param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
is_sparse=True)
MLP_Embedding_Item = fluid.embedding(input=inputs[1],
size=[num_items, int(layers[0] / 2)],
param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
is_sparse=True)
# The 0-th layer is the concatenation of embedding layers
vector = fluid.layers.concat(input=[MLP_Embedding_User, MLP_Embedding_Item], axis=-1)
for i in range(1, num_layer):
vector = fluid.layers.fc(input=vector,
size=layers[i],
act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0 / math.sqrt(vector.shape[1])),
regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4)),
name='layer_' + str(i))
# Final prediction layer
prediction = fluid.layers.fc(input=vector,
size=1,
act='sigmoid',
param_attr=fluid.initializer.MSRAInitializer(uniform=True),
name='prediction')
cost = fluid.layers.log_loss(input=prediction, label=fluid.layers.cast(x=inputs[2], dtype='float32'))
avg_cost = fluid.layers.mean(cost)
return avg_cost, prediction
\ No newline at end of file
import numpy as np
import paddle.fluid as fluid
import sys
import math
from time import time
class NeuMF(object):
def net(self, inputs, num_users, num_items, latent_dim, layers = [64,32,16,8]):
num_layer = len(layers) #Number of layers in the MLP
MF_Embedding_User = fluid.embedding(input=inputs[0],
size=[num_users, latent_dim],
param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
is_sparse=True)
MF_Embedding_Item = fluid.embedding(input=inputs[1],
size=[num_items, latent_dim],
param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
is_sparse=True)
MLP_Embedding_User = fluid.embedding(input=inputs[0],
size=[num_users, int(layers[0] / 2)],
param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
is_sparse=True)
MLP_Embedding_Item = fluid.embedding(input=inputs[1],
size=[num_items, int(layers[0] / 2)],
param_attr=fluid.initializer.Normal(loc=0.0, scale=0.01),
is_sparse=True)
# MF part
mf_user_latent = fluid.layers.flatten(x=MF_Embedding_User, axis=1)
mf_item_latent = fluid.layers.flatten(x=MF_Embedding_Item, axis=1)
mf_vector = fluid.layers.elementwise_mul(mf_user_latent, mf_item_latent)
#fluid.layers.Print(mf_vector, message="mf_vector")
# MLP part
# The 0-th layer is the concatenation of embedding layers
mlp_user_latent = fluid.layers.flatten(x=MLP_Embedding_User, axis=1)
mlp_item_latent = fluid.layers.flatten(x=MLP_Embedding_Item, axis=1)
mlp_vector = fluid.layers.concat(input=[mlp_user_latent, mlp_item_latent], axis=-1)
#fluid.layers.Print(mlp_vector, message="mlp_vector")
for i in range(1, num_layer):
mlp_vector = fluid.layers.fc(input=mlp_vector,
size=layers[i],
act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0 / math.sqrt(mlp_vector.shape[1])),
regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4)),
name='layer_' + str(i))
# Concatenate MF and MLP parts
predict_vector = fluid.layers.concat(input=[mf_vector, mlp_vector], axis=-1)
# Final prediction layer
prediction = fluid.layers.fc(input=predict_vector,
size=1,
act='sigmoid',
param_attr=fluid.initializer.MSRAInitializer(uniform=True),
name='prediction')
cost = fluid.layers.log_loss(input=prediction, label=fluid.layers.cast(x=inputs[2], dtype='float32'))
avg_cost = fluid.layers.mean(cost)
return avg_cost, prediction
\ No newline at end of file
absl-py==0.8.1
aspy.yaml==1.3.0
attrs==19.2.0
audioread==2.1.8
backcall==0.1.0
bleach==3.1.0
cachetools==4.0.0
certifi==2019.9.11
cffi==1.14.0
cfgv==2.0.1
chardet==3.0.4
Click==7.0
cloudpickle==1.2.1
cma==2.7.0
colorlog==4.1.0
cycler==0.10.0
Cython==0.29
decorator==4.4.0
entrypoints==0.3
flake8==3.7.9
Flask==1.1.1
funcsigs==1.0.2
future==0.18.0
google-auth==1.10.0
google-auth-oauthlib==0.4.1
graphviz==0.13
grpcio==1.26.0
gunicorn==20.0.4
gym==0.12.1
h5py==2.9.0
identify==1.4.10
idna==2.8
imageio==2.6.1
imageio-ffmpeg==0.3.0
importlib-metadata==0.23
ipykernel==5.1.0
ipython==7.0.1
ipython-genutils==0.2.0
itsdangerous==1.1.0
jedi==0.15.1
jieba==0.42.1
Jinja2==2.10.1
joblib==0.14.1
jsonschema==3.1.1
jupyter-client==5.3.3
jupyter-core==4.5.0
kiwisolver==1.1.0
librosa==0.7.2
llvmlite==0.31.0
Markdown==3.1.1
MarkupSafe==1.1.1
matplotlib==2.2.3
mccabe==0.6.1
mistune==0.8.4
more-itertools==7.2.0
moviepy==1.0.1
nbconvert==5.3.1
nbformat==4.4.0
networkx==2.4
nltk==3.4.5
nodeenv==1.3.4
notebook==5.7.0
numba==0.48.0
numpy==1.16.4
oauthlib==3.1.0
objgraph==3.4.1
opencv-python==4.1.1.26
paddlehub==1.5.0
paddlepaddle-gpu==1.7.1.post97
pandas==0.23.4
pandocfilters==1.4.2
parl==1.1.2
parso==0.5.1
pexpect==4.7.0
pickleshare==0.7.5
Pillow==6.2.0
pre-commit==1.21.0
prettytable==0.7.2
proglog==0.1.9
prometheus-client==0.5.0
prompt-toolkit==2.0.10
protobuf==3.10.0
ptyprocess==0.6.0
pyarrow==0.13.0
pyasn1==0.4.8
pyasn1-modules==0.2.7
pycodestyle==2.5.0
pycparser==2.19
pyflakes==2.1.1
pyglet==1.4.5
Pygments==2.4.2
pyparsing==2.4.2
pyrsistent==0.15.4
python-dateutil==2.8.0
pytz==2019.3
PyYAML==5.1.2
pyzmq==18.0.1
rarfile==3.1
recordio==0.1.7
requests==2.22.0
requests-oauthlib==1.3.0
resampy==0.2.2
rsa==4.0
scikit-learn==0.20.0
scipy==1.3.0
seaborn==0.10.0
Send2Trash==1.5.0
sentencepiece==0.1.85
simplegeneric==0.8.1
six==1.12.0
sklearn==0.0
SoundFile==0.10.3.post1
tb-nightly==1.15.0a20190801
tb-paddle==0.3.6
tensorboard==2.1.0
tensorboardX==1.8
termcolor==1.1.0
terminado==0.8.2
testpath==0.4.2
toml==0.10.0
tornado==5.1.1
tqdm==4.36.1
traitlets==4.3.3
urllib3==1.25.6
virtualenv==16.7.9
visualdl==1.3.0
wcwidth==0.1.7
webencodings==0.5.1
Werkzeug==0.16.0
xgboost==1.0.1
yapf==0.26.0
zipp==0.6.0
import numpy as np
import os
import paddle.fluid as fluid
from gmf import GMF
from mlp import MLP
from neumf import NeuMF
from Dataset import Dataset
import logging
import paddle
import args
import utils
import time
from evaluate import evaluate_model
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)
def train(args, train_data_path):
print("use_gpu:{}, NeuMF:{}, epochs:{}, batch_size:{}, num_factors:{}, num_neg:{}, lr:{}, model_dir:{}, layers:{}".format(
args.use_gpu, args.NeuMF, args.epochs, args.batch_size, args.num_factors, args.num_neg, args.lr, args.model_dir, args.layers))
dataset = Dataset(args.path + args.dataset)
testRatings, testNegatives = dataset.testRatings, dataset.testNegatives
train_data_generator = utils.CriteoDataset()
train_reader = paddle.batch(train_data_generator.train(train_data_path, True), batch_size=args.batch_size)
inputs = utils.input_data(True)
if args.GMF:
model = GMF()
loss, pred = model.net(inputs, args.num_users, args.num_items, args.num_factors)
elif args.MLP:
model = MLP()
loss, pred = model.net(inputs, args.num_users, args.num_items, args.layers)
elif args.NeuMF:
model = NeuMF()
loss, pred = model.net(inputs, args.num_users, args.num_items, args.num_factors, args.layers)
optimizer = fluid.optimizer.AdamOptimizer(args.lr)
optimizer.minimize(loss)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
feeder = fluid.DataFeeder(feed_list=inputs, place=place)
for epoch in range(args.epochs):
for batch_id, data in enumerate(train_reader()):
begin = time.time()
loss_val = exe.run(program=fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[loss.name],
return_numpy=True)
end = time.time()
logger.info("epoch: {}, batch_id: {}, batch_time: {:.5f}s, loss: {:.5f}".format(epoch, batch_id, end - begin, np.array(loss_val)[0][0]))
save_dir = "%s/epoch_%d" % (args.model_dir, epoch)
feed_var_names = ["user_input", "item_input"]
fetch_vars = [pred]
fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe)
if __name__ == "__main__":
args = args.parse_args()
train(args, args.train_data_path)
python train.py --use_gpu 0 \
--NeuMF 1 \
--epochs 20 \
--batch_size 256 \
--num_factors 8 \
--num_neg 4 \
--lr 0.001 \
--model_dir 'model_dir'
\ No newline at end of file
CUDA_VISIBLE_DEVICES=0 python train.py --use_gpu 1 \
--NeuMF 1 \
--epochs 20 \
--batch_size 256 \
--num_factors 8 \
--num_neg 4 \
--lr 0.001 \
--model_dir 'model_dir'
\ No newline at end of file
import numpy as np
import os
import paddle.fluid as fluid
class CriteoDataset(object):
def _reader_creator(self, file, is_train):
def reader():
with open(file, 'r') as f:
for i,line in enumerate(f):
line = line.strip().split(',')
features = list(map(int, line))
output = []
output.append([features[0]])
output.append([features[1]])
if is_train:
output.append([features[2]])
yield output
return reader
def train(self, file, is_train):
return self._reader_creator(file, is_train)
def test(self, file, is_train):
return self._reader_creator(file, is_train)
def input_data(is_train):
user_input = fluid.data(name="user_input", shape=[-1, 1], dtype="int64", lod_level=0)
item_input = fluid.data(name="item_input", shape=[-1, 1], dtype="int64", lod_level=0)
label = fluid.data(name="label", shape=[-1, 1], dtype="int64", lod_level=0)
if is_train:
inputs = [user_input] + [item_input] + [label]
else:
inputs = [user_input] + [item_input]
return inputs
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册