未验证 提交 e4ad047a 编写于 作者: O overlordmax 提交者: GitHub

fix bug (#4706)

上级 0f38ae13
......@@ -48,5 +48,5 @@ def train(args, train_data_path):
if __name__ == "__main__":
args = args.parse_args()
logger.info("epoch:{}, batch_size: {}, use_gpu: {}, train_data_path: {}, model_dir: {}, hidden1_units: {}, hidden2_units: {}, hidden3_units: {}".format(
args.epoch, args.batch_size, args.use_gpu, args.train_data_path, args.model_dir, args.hidden1_units, args.hidden2_units, args.hidden3_units))
args.epochs, args.batch_size, args.use_gpu, args.train_data_path, args.model_dir, args.hidden1_units, args.hidden2_units, args.hidden3_units))
train(args, args.train_data_path)
......@@ -6,6 +6,7 @@ def parse_args():
parser.add_argument('--dataset', nargs='?', default='ml-1m', help='Choose a dataset.')
parser.add_argument('--epochs', type=int, default=20, help='Number of epochs.')
parser.add_argument('--batch_size', type=int, default=256, help='Batch size.')
parser.add_argument('--test_epoch', type=str, default='19',help='test_epoch')
parser.add_argument('--test_batch_size', type=int, default=100, help='Batch size.')
parser.add_argument('--num_factors', type=int, default=8, help='Embedding size.')
parser.add_argument('--num_users', type=int, default=6040, help='num_users')
......
import math
import heapq # for retrieval topK
import heapq # for retrieval topK
import multiprocessing
import numpy as np
from time import time
......@@ -23,36 +23,30 @@ _K = None
_args = None
_model_path = None
def run_infer(args, model_path, test_data_path):
test_data_generator = utils.Dataset()
with fluid.scope_guard(fluid.Scope()):
test_reader = fluid.io.batch(
test_data_generator.test(test_data_path, False),
batch_size=args.test_batch_size)
test_reader = fluid.io.batch(test_data_generator.test(test_data_path, False), batch_size=args.test_batch_size)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model(
model_path, exe)
infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model(model_path, exe)
for data in test_reader():
user_input = np.array([dat[0] for dat in data])
item_input = np.array([dat[1] for dat in data])
pred_val = exe.run(
infer_program,
feed={"user_input": user_input,
"item_input": item_input},
fetch_list=fetch_vars,
return_numpy=True)
pred_val = exe.run(infer_program,
feed={"user_input": user_input,
"item_input": item_input},
fetch_list=fetch_vars,
return_numpy=True)
return pred_val[0].reshape(1, -1).tolist()[0]
def evaluate_model(args, testRatings, testNegatives, K, model_path):
"""
Evaluate the performance (Hit_Ratio, NDCG) of top-K recommendation
......@@ -62,23 +56,22 @@ def evaluate_model(args, testRatings, testNegatives, K, model_path):
global _testRatings
global _testNegatives
global _K
global _model_path
global _model_path
global _args
_args = args
_model_path = model_path
_model_path= model_path
_testRatings = testRatings
_testNegatives = testNegatives
_K = K
hits, ndcgs = [], []
hits, ndcgs = [],[]
for idx in range(len(_testRatings)):
(hr, ndcg) = eval_one_rating(idx)
(hr,ndcg) = eval_one_rating(idx)
hits.append(hr)
ndcgs.append(ndcg)
ndcgs.append(ndcg)
return (hits, ndcgs)
def eval_one_rating(idx):
rating = _testRatings[idx]
items = _testNegatives[idx]
......@@ -87,9 +80,9 @@ def eval_one_rating(idx):
items.append(gtItem)
# Get prediction scores
map_item_score = {}
users = np.full(len(items), u, dtype='int32')
users = users.reshape(-1, 1)
items_array = np.array(items).reshape(-1, 1)
users = np.full(len(items), u, dtype = 'int32')
users = users.reshape(-1,1)
items_array = np.array(items).reshape(-1,1)
temp = np.hstack((users, items_array))
np.savetxt("Data/test.txt", temp, fmt='%d', delimiter=',')
predictions = run_infer(_args, _model_path, _args.test_data_path)
......@@ -98,7 +91,7 @@ def eval_one_rating(idx):
item = items[i]
map_item_score[item] = predictions[i]
items.pop()
# Evaluate top rank list
ranklist = heapq.nlargest(_K, map_item_score, key=map_item_score.get)
hr = getHitRatio(ranklist, gtItem)
......@@ -106,17 +99,15 @@ def eval_one_rating(idx):
return (hr, ndcg)
def getHitRatio(ranklist, gtItem):
for item in ranklist:
if item == gtItem:
return 1
return 0
def getNDCG(ranklist, gtItem):
for i in range(len(ranklist)):
item = ranklist[i]
if item == gtItem:
return math.log(2) / math.log(i + 2)
return math.log(2) / math.log(i+2)
return 0
......@@ -32,7 +32,7 @@ def get_train_data(filename, write_file, num_negatives):
file = open(write_file, 'w')
print("writing " + write_file)
for (u, i) in mat:
for (u, i) in mat.keys():
# positive instance
user_input = str(u)
item_input = str(i)
......
......@@ -23,7 +23,7 @@ if __name__ == "__main__":
topK = 10
begin = time.time()
model_path = args.model_dir + "/epoch_" + str(12)
model_path = args.model_dir + "/epoch_" + args.test_epoch
(hits, ndcgs) = evaluate_model(args, testRatings, testNegatives, topK, model_path)
hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
end = time.time()
......
......@@ -27,20 +27,6 @@
python3.7
## 数据集说明
本项目构造数据集验证模型的正确性,字段说明如下:
user_slot_name:用户端特征群id
item_slot_name:item段特征群id
lenght:item的长度
label:用户对给定的是否点击item的list
注意:由于构造数据集的限制,本项目只用一个epoch,如果多个epoch,则多个epoch的数据是变化的,没有意义,因此只采用一个epoch。
## 单机训练
GPU环境
......
......@@ -22,7 +22,7 @@ import sys
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--epochs", type=int, default=1, help="epochs")
parser.add_argument("--epochs", type=int, default=20, help="epochs")
parser.add_argument("--batch_size", type=int, default=32, help="batch_size")
parser.add_argument("--test_epoch", type=int, default=1, help="test_epoch")
parser.add_argument('--use_gpu', type=int, default=0, help='whether using gpu')
......
......@@ -51,17 +51,17 @@ def train(args):
train_reader = fluid.io.batch(train_data_generator.get_train_data(), batch_size=args.batch_size)
loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=args.batch_size, iterable=True)
loader.set_sample_list_generator(train_reader, places=place)
for epoch in range(args.epochs):
for i in range(args.sample_size):
for batch_id, data in enumerate(loader()):
begin = time.time()
loss_val, auc = exe.run(program=fluid.default_main_program(),
feed=data,
fetch_list=[loss.name, auc_val],
return_numpy=True)
end = time.time()
logger.info("epoch: {}, batch_id: {}, batch_time: {:.5f}s, loss: {:.5f}, auc: {:.5f}".format(
epoch, batch_id, end-begin, float(np.array(loss_val)), float(np.array(auc))))
for i in range(args.sample_size):
for batch_id, data in enumerate(loader()):
begin = time.time()
loss_val, auc = exe.run(program=fluid.default_main_program(),
feed=data,
fetch_list=[loss.name, auc_val],
return_numpy=True)
end = time.time()
logger.info("batch_id: {}, batch_time: {:.5f}s, loss: {:.5f}, auc: {:.5f}".format(
batch_id, end-begin, float(np.array(loss_val)), float(np.array(auc))))
#save model
model_dir = os.path.join(args.model_dir, 'epoch_' + str(1), "checkpoint")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册