未验证 提交 e4ad047a 编写于 作者: O overlordmax 提交者: GitHub

fix bug (#4706)

上级 0f38ae13
...@@ -48,5 +48,5 @@ def train(args, train_data_path): ...@@ -48,5 +48,5 @@ def train(args, train_data_path):
if __name__ == "__main__": if __name__ == "__main__":
args = args.parse_args() args = args.parse_args()
logger.info("epoch:{}, batch_size: {}, use_gpu: {}, train_data_path: {}, model_dir: {}, hidden1_units: {}, hidden2_units: {}, hidden3_units: {}".format( logger.info("epoch:{}, batch_size: {}, use_gpu: {}, train_data_path: {}, model_dir: {}, hidden1_units: {}, hidden2_units: {}, hidden3_units: {}".format(
args.epoch, args.batch_size, args.use_gpu, args.train_data_path, args.model_dir, args.hidden1_units, args.hidden2_units, args.hidden3_units)) args.epochs, args.batch_size, args.use_gpu, args.train_data_path, args.model_dir, args.hidden1_units, args.hidden2_units, args.hidden3_units))
train(args, args.train_data_path) train(args, args.train_data_path)
...@@ -6,6 +6,7 @@ def parse_args(): ...@@ -6,6 +6,7 @@ def parse_args():
parser.add_argument('--dataset', nargs='?', default='ml-1m', help='Choose a dataset.') parser.add_argument('--dataset', nargs='?', default='ml-1m', help='Choose a dataset.')
parser.add_argument('--epochs', type=int, default=20, help='Number of epochs.') parser.add_argument('--epochs', type=int, default=20, help='Number of epochs.')
parser.add_argument('--batch_size', type=int, default=256, help='Batch size.') parser.add_argument('--batch_size', type=int, default=256, help='Batch size.')
parser.add_argument('--test_epoch', type=str, default='19',help='test_epoch')
parser.add_argument('--test_batch_size', type=int, default=100, help='Batch size.') parser.add_argument('--test_batch_size', type=int, default=100, help='Batch size.')
parser.add_argument('--num_factors', type=int, default=8, help='Embedding size.') parser.add_argument('--num_factors', type=int, default=8, help='Embedding size.')
parser.add_argument('--num_users', type=int, default=6040, help='num_users') parser.add_argument('--num_users', type=int, default=6040, help='num_users')
......
import math import math
import heapq # for retrieval topK import heapq # for retrieval topK
import multiprocessing import multiprocessing
import numpy as np import numpy as np
from time import time from time import time
...@@ -23,36 +23,30 @@ _K = None ...@@ -23,36 +23,30 @@ _K = None
_args = None _args = None
_model_path = None _model_path = None
def run_infer(args, model_path, test_data_path): def run_infer(args, model_path, test_data_path):
test_data_generator = utils.Dataset() test_data_generator = utils.Dataset()
with fluid.scope_guard(fluid.Scope()): with fluid.scope_guard(fluid.Scope()):
test_reader = fluid.io.batch( test_reader = fluid.io.batch(test_data_generator.test(test_data_path, False), batch_size=args.test_batch_size)
test_data_generator.test(test_data_path, False),
batch_size=args.test_batch_size)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model( infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model(model_path, exe)
model_path, exe)
for data in test_reader(): for data in test_reader():
user_input = np.array([dat[0] for dat in data]) user_input = np.array([dat[0] for dat in data])
item_input = np.array([dat[1] for dat in data]) item_input = np.array([dat[1] for dat in data])
pred_val = exe.run( pred_val = exe.run(infer_program,
infer_program, feed={"user_input": user_input,
feed={"user_input": user_input, "item_input": item_input},
"item_input": item_input}, fetch_list=fetch_vars,
fetch_list=fetch_vars, return_numpy=True)
return_numpy=True)
return pred_val[0].reshape(1, -1).tolist()[0] return pred_val[0].reshape(1, -1).tolist()[0]
def evaluate_model(args, testRatings, testNegatives, K, model_path): def evaluate_model(args, testRatings, testNegatives, K, model_path):
""" """
Evaluate the performance (Hit_Ratio, NDCG) of top-K recommendation Evaluate the performance (Hit_Ratio, NDCG) of top-K recommendation
...@@ -62,23 +56,22 @@ def evaluate_model(args, testRatings, testNegatives, K, model_path): ...@@ -62,23 +56,22 @@ def evaluate_model(args, testRatings, testNegatives, K, model_path):
global _testRatings global _testRatings
global _testNegatives global _testNegatives
global _K global _K
global _model_path global _model_path
global _args global _args
_args = args _args = args
_model_path = model_path _model_path= model_path
_testRatings = testRatings _testRatings = testRatings
_testNegatives = testNegatives _testNegatives = testNegatives
_K = K _K = K
hits, ndcgs = [], [] hits, ndcgs = [],[]
for idx in range(len(_testRatings)): for idx in range(len(_testRatings)):
(hr, ndcg) = eval_one_rating(idx) (hr,ndcg) = eval_one_rating(idx)
hits.append(hr) hits.append(hr)
ndcgs.append(ndcg) ndcgs.append(ndcg)
return (hits, ndcgs) return (hits, ndcgs)
def eval_one_rating(idx): def eval_one_rating(idx):
rating = _testRatings[idx] rating = _testRatings[idx]
items = _testNegatives[idx] items = _testNegatives[idx]
...@@ -87,9 +80,9 @@ def eval_one_rating(idx): ...@@ -87,9 +80,9 @@ def eval_one_rating(idx):
items.append(gtItem) items.append(gtItem)
# Get prediction scores # Get prediction scores
map_item_score = {} map_item_score = {}
users = np.full(len(items), u, dtype='int32') users = np.full(len(items), u, dtype = 'int32')
users = users.reshape(-1, 1) users = users.reshape(-1,1)
items_array = np.array(items).reshape(-1, 1) items_array = np.array(items).reshape(-1,1)
temp = np.hstack((users, items_array)) temp = np.hstack((users, items_array))
np.savetxt("Data/test.txt", temp, fmt='%d', delimiter=',') np.savetxt("Data/test.txt", temp, fmt='%d', delimiter=',')
predictions = run_infer(_args, _model_path, _args.test_data_path) predictions = run_infer(_args, _model_path, _args.test_data_path)
...@@ -98,7 +91,7 @@ def eval_one_rating(idx): ...@@ -98,7 +91,7 @@ def eval_one_rating(idx):
item = items[i] item = items[i]
map_item_score[item] = predictions[i] map_item_score[item] = predictions[i]
items.pop() items.pop()
# Evaluate top rank list # Evaluate top rank list
ranklist = heapq.nlargest(_K, map_item_score, key=map_item_score.get) ranklist = heapq.nlargest(_K, map_item_score, key=map_item_score.get)
hr = getHitRatio(ranklist, gtItem) hr = getHitRatio(ranklist, gtItem)
...@@ -106,17 +99,15 @@ def eval_one_rating(idx): ...@@ -106,17 +99,15 @@ def eval_one_rating(idx):
return (hr, ndcg) return (hr, ndcg)
def getHitRatio(ranklist, gtItem): def getHitRatio(ranklist, gtItem):
for item in ranklist: for item in ranklist:
if item == gtItem: if item == gtItem:
return 1 return 1
return 0 return 0
def getNDCG(ranklist, gtItem): def getNDCG(ranklist, gtItem):
for i in range(len(ranklist)): for i in range(len(ranklist)):
item = ranklist[i] item = ranklist[i]
if item == gtItem: if item == gtItem:
return math.log(2) / math.log(i + 2) return math.log(2) / math.log(i+2)
return 0 return 0
...@@ -32,7 +32,7 @@ def get_train_data(filename, write_file, num_negatives): ...@@ -32,7 +32,7 @@ def get_train_data(filename, write_file, num_negatives):
file = open(write_file, 'w') file = open(write_file, 'w')
print("writing " + write_file) print("writing " + write_file)
for (u, i) in mat: for (u, i) in mat.keys():
# positive instance # positive instance
user_input = str(u) user_input = str(u)
item_input = str(i) item_input = str(i)
......
...@@ -23,7 +23,7 @@ if __name__ == "__main__": ...@@ -23,7 +23,7 @@ if __name__ == "__main__":
topK = 10 topK = 10
begin = time.time() begin = time.time()
model_path = args.model_dir + "/epoch_" + str(12) model_path = args.model_dir + "/epoch_" + args.test_epoch
(hits, ndcgs) = evaluate_model(args, testRatings, testNegatives, topK, model_path) (hits, ndcgs) = evaluate_model(args, testRatings, testNegatives, topK, model_path)
hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean() hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
end = time.time() end = time.time()
......
...@@ -27,20 +27,6 @@ ...@@ -27,20 +27,6 @@
python3.7 python3.7
## 数据集说明
本项目构造数据集验证模型的正确性,字段说明如下:
user_slot_name:用户端特征群id
item_slot_name:item段特征群id
lenght:item的长度
label:用户对给定的是否点击item的list
注意:由于构造数据集的限制,本项目只用一个epoch,如果多个epoch,则多个epoch的数据是变化的,没有意义,因此只采用一个epoch。
## 单机训练 ## 单机训练
GPU环境 GPU环境
......
...@@ -22,7 +22,7 @@ import sys ...@@ -22,7 +22,7 @@ import sys
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--epochs", type=int, default=1, help="epochs") parser.add_argument("--epochs", type=int, default=20, help="epochs")
parser.add_argument("--batch_size", type=int, default=32, help="batch_size") parser.add_argument("--batch_size", type=int, default=32, help="batch_size")
parser.add_argument("--test_epoch", type=int, default=1, help="test_epoch") parser.add_argument("--test_epoch", type=int, default=1, help="test_epoch")
parser.add_argument('--use_gpu', type=int, default=0, help='whether using gpu') parser.add_argument('--use_gpu', type=int, default=0, help='whether using gpu')
......
...@@ -51,17 +51,17 @@ def train(args): ...@@ -51,17 +51,17 @@ def train(args):
train_reader = fluid.io.batch(train_data_generator.get_train_data(), batch_size=args.batch_size) train_reader = fluid.io.batch(train_data_generator.get_train_data(), batch_size=args.batch_size)
loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=args.batch_size, iterable=True) loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=args.batch_size, iterable=True)
loader.set_sample_list_generator(train_reader, places=place) loader.set_sample_list_generator(train_reader, places=place)
for epoch in range(args.epochs):
for i in range(args.sample_size): for i in range(args.sample_size):
for batch_id, data in enumerate(loader()): for batch_id, data in enumerate(loader()):
begin = time.time() begin = time.time()
loss_val, auc = exe.run(program=fluid.default_main_program(), loss_val, auc = exe.run(program=fluid.default_main_program(),
feed=data, feed=data,
fetch_list=[loss.name, auc_val], fetch_list=[loss.name, auc_val],
return_numpy=True) return_numpy=True)
end = time.time() end = time.time()
logger.info("epoch: {}, batch_id: {}, batch_time: {:.5f}s, loss: {:.5f}, auc: {:.5f}".format( logger.info("batch_id: {}, batch_time: {:.5f}s, loss: {:.5f}, auc: {:.5f}".format(
epoch, batch_id, end-begin, float(np.array(loss_val)), float(np.array(auc)))) batch_id, end-begin, float(np.array(loss_val)), float(np.array(auc))))
#save model #save model
model_dir = os.path.join(args.model_dir, 'epoch_' + str(1), "checkpoint") model_dir = os.path.join(args.model_dir, 'epoch_' + str(1), "checkpoint")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册