未验证 提交 ce853f67 编写于 作者: H hutuxian 提交者: GitHub

SR-GNN: some optimization (#2676)

use all data
network:
    support variable batch_size
    add some shape comment
infer:
    use pyreader
上级 41b772c8
...@@ -19,6 +19,7 @@ import os ...@@ -19,6 +19,7 @@ import os
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import reader import reader
import network
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid") logger = logging.getLogger("fluid")
...@@ -31,6 +32,8 @@ def parse_args(): ...@@ -31,6 +32,8 @@ def parse_args():
'--model_path', type=str, default='./saved_model/', help="path of model parameters") '--model_path', type=str, default='./saved_model/', help="path of model parameters")
parser.add_argument( parser.add_argument(
'--test_path', type=str, default='./data/diginetica/test.txt', help='dir of test file') '--test_path', type=str, default='./data/diginetica/test.txt', help='dir of test file')
parser.add_argument(
'--config_path', type=str, default='./data/diginetica/config.txt', help='dir of config')
parser.add_argument( parser.add_argument(
'--use_cuda', type=int, default=1, help='whether to use gpu') '--use_cuda', type=int, default=1, help='whether to use gpu')
parser.add_argument( parser.add_argument(
...@@ -39,40 +42,51 @@ def parse_args(): ...@@ -39,40 +42,51 @@ def parse_args():
'--start_index', type=int, default='0', help='start index') '--start_index', type=int, default='0', help='start index')
parser.add_argument( parser.add_argument(
'--last_index', type=int, default='10', help='end index') '--last_index', type=int, default='10', help='end index')
parser.add_argument(
'--hidden_size', type=int, default=100, help='hidden state size')
parser.add_argument(
'--step', type=int, default=1, help='gnn propogation steps')
return parser.parse_args() return parser.parse_args()
def infer(epoch_num): def infer(args):
args = parse_args()
batch_size = args.batch_size batch_size = args.batch_size
items_num = reader.read_config(args.config_path)
test_data = reader.Data(args.test_path, False) test_data = reader.Data(args.test_path, False)
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, args.step)
exe.run(fluid.default_startup_program())
infer_program = fluid.default_main_program().clone(for_test=True)
model_path = args.model_path + "epoch_" + str(epoch_num) for epoch_num in range(args.start_index, args.last_index + 1):
try: model_path = args.model_path + "epoch_" + str(epoch_num)
[infer_program, feed_names, fetch_targets] = fluid.io.load_inference_model( try:
model_path, exe) if not os.path.exists(model_path):
feeder = fluid.DataFeeder( raise ValueError()
feed_list=feed_names, place=place, program=infer_program) fluid.io.load_persistables(executor=exe, dirname=model_path,
main_program=infer_program)
loss_sum = 0.0 loss_sum = 0.0
acc_sum = 0.0 acc_sum = 0.0
count = 0 count = 0
for data in test_data.reader(batch_size, batch_size, False)(): py_reader.decorate_paddle_reader(test_data.reader(batch_size, batch_size*20, False))
res = exe.run(infer_program, py_reader.start()
feed=feeder.feed(data), try:
fetch_list=fetch_targets) while True:
loss_sum += res[0] res = exe.run(infer_program,
acc_sum += res[1] fetch_list=[loss.name, acc.name], use_program_cache=True)
count += 1 loss_sum += res[0]
logger.info("TEST --> loss: %.4lf, Recall@20: %.4lf" % acc_sum += res[1]
(loss_sum / count, acc_sum / count)) count += 1
except ValueError as e: except fluid.core.EOFException:
logger.info("TEST --> error: there is no model in " + model_path) py_reader.reset()
logger.info("TEST --> loss: %.4lf, Recall@20: %.4lf" %
(loss_sum / count, acc_sum / count))
except ValueError as e:
logger.info("TEST --> error: there is no model in " + model_path)
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
for index in range(args.start_index, args.last_index + 1): infer(args)
infer(index)
...@@ -19,44 +19,37 @@ import paddle.fluid as fluid ...@@ -19,44 +19,37 @@ import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
def network(batch_size, items_num, hidden_size, step): def network(items_num, hidden_size, step):
stdv = 1.0 / math.sqrt(hidden_size) stdv = 1.0 / math.sqrt(hidden_size)
items = layers.data( items = layers.data(
name="items", name="items",
shape=[batch_size, items_num, 1], shape=[1, 1],
dtype="int64", dtype="int64") #[batch_size, uniq_max, 1]
append_batch_size=False) #[bs, uniq_max, 1]
seq_index = layers.data( seq_index = layers.data(
name="seq_index", name="seq_index",
shape=[batch_size, items_num], shape=[1],
dtype="int32", dtype="int32") #[batch_size, seq_max]
append_batch_size=False) #[-1(seq_max)*batch_size, 1]
last_index = layers.data( last_index = layers.data(
name="last_index", name="last_index",
shape=[batch_size], shape=[1],
dtype="int32", dtype="int32") #[batch_size, 1]
append_batch_size=False) #[batch_size, 1]
adj_in = layers.data( adj_in = layers.data(
name="adj_in", name="adj_in",
shape=[batch_size, items_num, items_num], shape=[1,1],
dtype="float32", dtype="float32") #[batch_size, seq_max, seq_max]
append_batch_size=False)
adj_out = layers.data( adj_out = layers.data(
name="adj_out", name="adj_out",
shape=[batch_size, items_num, items_num], shape=[1,1],
dtype="float32", dtype="float32") #[batch_size, seq_max, seq_max]
append_batch_size=False)
mask = layers.data( mask = layers.data(
name="mask", name="mask",
shape=[batch_size, -1, 1], shape=[1, 1],
dtype="float32", dtype="float32") #[batch_size, seq_max, 1]
append_batch_size=False)
label = layers.data( label = layers.data(
name="label", name="label",
shape=[batch_size, 1], shape=[1],
dtype="int64", dtype="int64") #[batch_size, 1]
append_batch_size=False)
datas = [items, seq_index, last_index, adj_in, adj_out, mask, label] datas = [items, seq_index, last_index, adj_in, adj_out, mask, label]
py_reader = fluid.layers.create_py_reader_by_data( py_reader = fluid.layers.create_py_reader_by_data(
...@@ -71,11 +64,12 @@ def network(batch_size, items_num, hidden_size, step): ...@@ -71,11 +64,12 @@ def network(batch_size, items_num, hidden_size, step):
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)), low=-stdv, high=stdv)),
size=[items_num, hidden_size]) #[batch_size, uniq_max, h] size=[items_num, hidden_size]) #[batch_size, uniq_max, h]
items_emb_shape = layers.shape(items_emb)
pre_state = items_emb pre_state = items_emb
for i in range(step): for i in range(step):
pre_state = layers.reshape( pre_state = layers.reshape(
x=pre_state, shape=[batch_size, -1, hidden_size]) x=pre_state, shape=[-1, 1, hidden_size], actual_shape=items_emb_shape)
state_in = layers.fc( state_in = layers.fc(
input=pre_state, input=pre_state,
name="state_in", name="state_in",
...@@ -114,15 +108,20 @@ def network(batch_size, items_num, hidden_size, step): ...@@ -114,15 +108,20 @@ def network(batch_size, items_num, hidden_size, step):
x=pre_state, shape=[-1, hidden_size]), x=pre_state, shape=[-1, hidden_size]),
size=3 * hidden_size) size=3 * hidden_size)
final_state = pre_state final_state = pre_state #[batch_size * uniq_max, h]
seq_origin_shape = layers.assign(np.array([0,0,hidden_size-1]).astype("int32"))
seq_origin_shape += layers.shape(layers.unsqueeze(seq_index,[2])) #value: [batch_size, seq_max, h]
seq_origin_shape.stop_gradient = True
seq_index = layers.reshape(seq_index, shape=[-1]) seq_index = layers.reshape(seq_index, shape=[-1])
seq = layers.gather(final_state, seq_index) #[batch_size*-1(seq_max), h] seq = layers.gather(final_state, seq_index) #[batch_size * seq_max, h]
last = layers.gather(final_state, last_index) #[batch_size, h] last = layers.gather(final_state, last_index) #[batch_size, h]
seq = layers.reshape( seq = layers.reshape(
seq, shape=[batch_size, -1, hidden_size]) #[batch_size, -1(seq_max), h] seq, shape=[-1, 1, hidden_size], actual_shape=seq_origin_shape) #[batch_size, seq_max, h]
last = layers.reshape( last = layers.reshape(
last, shape=[batch_size, hidden_size]) #[batch_size, h] last, shape=[-1, hidden_size]) #[batch_size, h]
seq_fc = layers.fc( seq_fc = layers.fc(
input=seq, input=seq,
...@@ -133,7 +132,7 @@ def network(batch_size, items_num, hidden_size, step): ...@@ -133,7 +132,7 @@ def network(batch_size, items_num, hidden_size, step):
num_flatten_dims=2, num_flatten_dims=2,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, -1(seq_max), h] low=-stdv, high=stdv))) #[batch_size, seq_max, h]
last_fc = layers.fc( last_fc = layers.fc(
input=last, input=last,
name="last_fc", name="last_fc",
...@@ -146,18 +145,18 @@ def network(batch_size, items_num, hidden_size, step): ...@@ -146,18 +145,18 @@ def network(batch_size, items_num, hidden_size, step):
low=-stdv, high=stdv))) #[bathc_size, h] low=-stdv, high=stdv))) #[bathc_size, h]
seq_fc_t = layers.transpose( seq_fc_t = layers.transpose(
seq_fc, perm=[1, 0, 2]) #[-1(seq_max), batch_size, h] seq_fc, perm=[1, 0, 2]) #[seq_max, batch_size, h]
add = layers.elementwise_add( add = layers.elementwise_add(
seq_fc_t, last_fc) #[-1(seq_max), batch_size, h] seq_fc_t, last_fc) #[seq_max, batch_size, h]
b = layers.create_parameter( b = layers.create_parameter(
shape=[hidden_size], shape=[hidden_size],
dtype='float32', dtype='float32',
default_initializer=fluid.initializer.Constant(value=0.0)) #[h] default_initializer=fluid.initializer.Constant(value=0.0)) #[h]
add = layers.elementwise_add(add, b) #[-1(seq_max), batch_size, h] add = layers.elementwise_add(add, b) #[seq_max, batch_size, h]
add_sigmoid = layers.sigmoid(add) #[-1(seq_max), batch_size, h] add_sigmoid = layers.sigmoid(add) #[seq_max, batch_size, h]
add_sigmoid = layers.transpose( add_sigmoid = layers.transpose(
add_sigmoid, perm=[1, 0, 2]) #[batch_size, -1(seq_max), h] add_sigmoid, perm=[1, 0, 2]) #[batch_size, seq_max, h]
weight = layers.fc( weight = layers.fc(
input=add_sigmoid, input=add_sigmoid,
...@@ -168,10 +167,10 @@ def network(batch_size, items_num, hidden_size, step): ...@@ -168,10 +167,10 @@ def network(batch_size, items_num, hidden_size, step):
bias_attr=False, bias_attr=False,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, -1, 1] low=-stdv, high=stdv))) #[batch_size, seq_max, 1]
weight *= mask weight *= mask
weight_mask = layers.elementwise_mul(seq, weight, axis=0) weight_mask = layers.elementwise_mul(seq, weight, axis=0) #[batch_size, seq_max, h]
global_attention = layers.reduce_sum(weight_mask, dim=1) global_attention = layers.reduce_sum(weight_mask, dim=1) #[batch_size, h]
final_attention = layers.concat( final_attention = layers.concat(
[global_attention, last], axis=1) #[batch_size, 2*h] [global_attention, last], axis=1) #[batch_size, 2*h]
......
...@@ -98,17 +98,20 @@ class Data(): ...@@ -98,17 +98,20 @@ class Data():
cur_batch = cur_bg[i:i + batch_size] cur_batch = cur_bg[i:i + batch_size]
yield self.make_data(cur_batch, batch_size) yield self.make_data(cur_batch, batch_size)
#deal with the remaining, discard at most batch_size data #deal with the last batch group
if group_remain < batch_size: if group_remain == 0:
return return
remain_data = copy.deepcopy(self.input[-group_remain:]) remain_data = copy.deepcopy(self.input[-group_remain:])
if train: if train:
remain_data = sorted( remain_data = sorted(
remain_data, key=lambda x: len(x[0]), reverse=True) remain_data, key=lambda x: len(x[0]), reverse=True)
for i in range(0, batch_group_size, batch_size): for i in range(0, group_remain, batch_size):
if i + batch_size <= len(remain_data): if i + batch_size <= group_remain:
cur_batch = remain_data[i:i + batch_size] cur_batch = remain_data[i:i + batch_size]
yield self.make_data(cur_batch, batch_size) yield self.make_data(cur_batch, batch_size)
else:
cur_batch = remain_data[i:]
yield self.make_data(cur_batch, group_remain % batch_size)
return _reader return _reader
......
...@@ -71,7 +71,7 @@ def train(): ...@@ -71,7 +71,7 @@ def train():
batch_size = args.batch_size batch_size = args.batch_size
items_num = reader.read_config(args.config_path) items_num = reader.read_config(args.config_path)
loss, acc, py_reader, feed_datas = network.network(batch_size, items_num, args.hidden_size, loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size,
args.step) args.step)
data_reader = reader.Data(args.train_path, True) data_reader = reader.Data(args.train_path, True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册