未验证 提交 ce853f67 编写于 作者: H hutuxian 提交者: GitHub

SR-GNN: some optimization (#2676)

use all data
network:
    support variable batch_size
    add some shape comment
infer:
    use pyreader
上级 41b772c8
......@@ -19,6 +19,7 @@ import os
import paddle
import paddle.fluid as fluid
import reader
import network
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid")
......@@ -31,6 +32,8 @@ def parse_args():
'--model_path', type=str, default='./saved_model/', help="path of model parameters")
parser.add_argument(
'--test_path', type=str, default='./data/diginetica/test.txt', help='dir of test file')
parser.add_argument(
'--config_path', type=str, default='./data/diginetica/config.txt', help='dir of config')
parser.add_argument(
'--use_cuda', type=int, default=1, help='whether to use gpu')
parser.add_argument(
......@@ -39,40 +42,51 @@ def parse_args():
'--start_index', type=int, default='0', help='start index')
parser.add_argument(
'--last_index', type=int, default='10', help='end index')
parser.add_argument(
'--hidden_size', type=int, default=100, help='hidden state size')
parser.add_argument(
'--step', type=int, default=1, help='gnn propogation steps')
return parser.parse_args()
def infer(epoch_num):
args = parse_args()
def infer(args):
batch_size = args.batch_size
items_num = reader.read_config(args.config_path)
test_data = reader.Data(args.test_path, False)
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, args.step)
exe.run(fluid.default_startup_program())
infer_program = fluid.default_main_program().clone(for_test=True)
model_path = args.model_path + "epoch_" + str(epoch_num)
try:
[infer_program, feed_names, fetch_targets] = fluid.io.load_inference_model(
model_path, exe)
feeder = fluid.DataFeeder(
feed_list=feed_names, place=place, program=infer_program)
for epoch_num in range(args.start_index, args.last_index + 1):
model_path = args.model_path + "epoch_" + str(epoch_num)
try:
if not os.path.exists(model_path):
raise ValueError()
fluid.io.load_persistables(executor=exe, dirname=model_path,
main_program=infer_program)
loss_sum = 0.0
acc_sum = 0.0
count = 0
for data in test_data.reader(batch_size, batch_size, False)():
res = exe.run(infer_program,
feed=feeder.feed(data),
fetch_list=fetch_targets)
loss_sum += res[0]
acc_sum += res[1]
count += 1
logger.info("TEST --> loss: %.4lf, Recall@20: %.4lf" %
(loss_sum / count, acc_sum / count))
except ValueError as e:
logger.info("TEST --> error: there is no model in " + model_path)
loss_sum = 0.0
acc_sum = 0.0
count = 0
py_reader.decorate_paddle_reader(test_data.reader(batch_size, batch_size*20, False))
py_reader.start()
try:
while True:
res = exe.run(infer_program,
fetch_list=[loss.name, acc.name], use_program_cache=True)
loss_sum += res[0]
acc_sum += res[1]
count += 1
except fluid.core.EOFException:
py_reader.reset()
logger.info("TEST --> loss: %.4lf, Recall@20: %.4lf" %
(loss_sum / count, acc_sum / count))
except ValueError as e:
logger.info("TEST --> error: there is no model in " + model_path)
if __name__ == "__main__":
args = parse_args()
for index in range(args.start_index, args.last_index + 1):
infer(index)
infer(args)
......@@ -19,44 +19,37 @@ import paddle.fluid as fluid
import paddle.fluid.layers as layers
def network(batch_size, items_num, hidden_size, step):
def network(items_num, hidden_size, step):
stdv = 1.0 / math.sqrt(hidden_size)
items = layers.data(
name="items",
shape=[batch_size, items_num, 1],
dtype="int64",
append_batch_size=False) #[bs, uniq_max, 1]
shape=[1, 1],
dtype="int64") #[batch_size, uniq_max, 1]
seq_index = layers.data(
name="seq_index",
shape=[batch_size, items_num],
dtype="int32",
append_batch_size=False) #[-1(seq_max)*batch_size, 1]
shape=[1],
dtype="int32") #[batch_size, seq_max]
last_index = layers.data(
name="last_index",
shape=[batch_size],
dtype="int32",
append_batch_size=False) #[batch_size, 1]
shape=[1],
dtype="int32") #[batch_size, 1]
adj_in = layers.data(
name="adj_in",
shape=[batch_size, items_num, items_num],
dtype="float32",
append_batch_size=False)
shape=[1,1],
dtype="float32") #[batch_size, seq_max, seq_max]
adj_out = layers.data(
name="adj_out",
shape=[batch_size, items_num, items_num],
dtype="float32",
append_batch_size=False)
shape=[1,1],
dtype="float32") #[batch_size, seq_max, seq_max]
mask = layers.data(
name="mask",
shape=[batch_size, -1, 1],
dtype="float32",
append_batch_size=False)
shape=[1, 1],
dtype="float32") #[batch_size, seq_max, 1]
label = layers.data(
name="label",
shape=[batch_size, 1],
dtype="int64",
append_batch_size=False)
shape=[1],
dtype="int64") #[batch_size, 1]
datas = [items, seq_index, last_index, adj_in, adj_out, mask, label]
py_reader = fluid.layers.create_py_reader_by_data(
......@@ -71,11 +64,12 @@ def network(batch_size, items_num, hidden_size, step):
initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)),
size=[items_num, hidden_size]) #[batch_size, uniq_max, h]
items_emb_shape = layers.shape(items_emb)
pre_state = items_emb
for i in range(step):
pre_state = layers.reshape(
x=pre_state, shape=[batch_size, -1, hidden_size])
x=pre_state, shape=[-1, 1, hidden_size], actual_shape=items_emb_shape)
state_in = layers.fc(
input=pre_state,
name="state_in",
......@@ -114,15 +108,20 @@ def network(batch_size, items_num, hidden_size, step):
x=pre_state, shape=[-1, hidden_size]),
size=3 * hidden_size)
final_state = pre_state
final_state = pre_state #[batch_size * uniq_max, h]
seq_origin_shape = layers.assign(np.array([0,0,hidden_size-1]).astype("int32"))
seq_origin_shape += layers.shape(layers.unsqueeze(seq_index,[2])) #value: [batch_size, seq_max, h]
seq_origin_shape.stop_gradient = True
seq_index = layers.reshape(seq_index, shape=[-1])
seq = layers.gather(final_state, seq_index) #[batch_size*-1(seq_max), h]
seq = layers.gather(final_state, seq_index) #[batch_size * seq_max, h]
last = layers.gather(final_state, last_index) #[batch_size, h]
seq = layers.reshape(
seq, shape=[batch_size, -1, hidden_size]) #[batch_size, -1(seq_max), h]
seq, shape=[-1, 1, hidden_size], actual_shape=seq_origin_shape) #[batch_size, seq_max, h]
last = layers.reshape(
last, shape=[batch_size, hidden_size]) #[batch_size, h]
last, shape=[-1, hidden_size]) #[batch_size, h]
seq_fc = layers.fc(
input=seq,
......@@ -133,7 +132,7 @@ def network(batch_size, items_num, hidden_size, step):
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, -1(seq_max), h]
low=-stdv, high=stdv))) #[batch_size, seq_max, h]
last_fc = layers.fc(
input=last,
name="last_fc",
......@@ -146,18 +145,18 @@ def network(batch_size, items_num, hidden_size, step):
low=-stdv, high=stdv))) #[bathc_size, h]
seq_fc_t = layers.transpose(
seq_fc, perm=[1, 0, 2]) #[-1(seq_max), batch_size, h]
seq_fc, perm=[1, 0, 2]) #[seq_max, batch_size, h]
add = layers.elementwise_add(
seq_fc_t, last_fc) #[-1(seq_max), batch_size, h]
seq_fc_t, last_fc) #[seq_max, batch_size, h]
b = layers.create_parameter(
shape=[hidden_size],
dtype='float32',
default_initializer=fluid.initializer.Constant(value=0.0)) #[h]
add = layers.elementwise_add(add, b) #[-1(seq_max), batch_size, h]
add = layers.elementwise_add(add, b) #[seq_max, batch_size, h]
add_sigmoid = layers.sigmoid(add) #[-1(seq_max), batch_size, h]
add_sigmoid = layers.sigmoid(add) #[seq_max, batch_size, h]
add_sigmoid = layers.transpose(
add_sigmoid, perm=[1, 0, 2]) #[batch_size, -1(seq_max), h]
add_sigmoid, perm=[1, 0, 2]) #[batch_size, seq_max, h]
weight = layers.fc(
input=add_sigmoid,
......@@ -168,10 +167,10 @@ def network(batch_size, items_num, hidden_size, step):
bias_attr=False,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, -1, 1]
low=-stdv, high=stdv))) #[batch_size, seq_max, 1]
weight *= mask
weight_mask = layers.elementwise_mul(seq, weight, axis=0)
global_attention = layers.reduce_sum(weight_mask, dim=1)
weight_mask = layers.elementwise_mul(seq, weight, axis=0) #[batch_size, seq_max, h]
global_attention = layers.reduce_sum(weight_mask, dim=1) #[batch_size, h]
final_attention = layers.concat(
[global_attention, last], axis=1) #[batch_size, 2*h]
......
......@@ -98,17 +98,20 @@ class Data():
cur_batch = cur_bg[i:i + batch_size]
yield self.make_data(cur_batch, batch_size)
#deal with the remaining, discard at most batch_size data
if group_remain < batch_size:
#deal with the last batch group
if group_remain == 0:
return
remain_data = copy.deepcopy(self.input[-group_remain:])
if train:
remain_data = sorted(
remain_data, key=lambda x: len(x[0]), reverse=True)
for i in range(0, batch_group_size, batch_size):
if i + batch_size <= len(remain_data):
for i in range(0, group_remain, batch_size):
if i + batch_size <= group_remain:
cur_batch = remain_data[i:i + batch_size]
yield self.make_data(cur_batch, batch_size)
else:
cur_batch = remain_data[i:]
yield self.make_data(cur_batch, group_remain % batch_size)
return _reader
......
......@@ -71,7 +71,7 @@ def train():
batch_size = args.batch_size
items_num = reader.read_config(args.config_path)
loss, acc, py_reader, feed_datas = network.network(batch_size, items_num, args.hidden_size,
loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size,
args.step)
data_reader = reader.Data(args.train_path, True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册