未验证 提交 5312aaa1 编写于 作者: H hutuxian 提交者: GitHub

upgrade API for 1.7(din & sr-gnn) (#4301)

Upgrade API for 1.7
* PaddleRec/din
* PaddleRec/gnn
上级 68c7b046
...@@ -78,20 +78,21 @@ def infer(): ...@@ -78,20 +78,21 @@ def infer():
[inference_program, feed_target_names, [inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe) fetch_targets] = fluid.io.load_inference_model(model_path, exe)
feeder = fluid.DataFeeder( loader = fluid.io.DataLoader.from_generator(
feed_list=feed_target_names, place=place, program=inference_program) feed_list=[inference_program.block(0).var(e) for e in feed_target_names], capacity=10000, iterable=True)
loader.set_sample_list_generator(data_reader, places=fluid.cuda_places())
loss_sum = 0.0 loss_sum = 0.0
score = [] score = []
count = 0 count = 0
for data in data_reader(): for data in loader():
res = exe.run(inference_program, res = exe.run(inference_program,
feed=feeder.feed(data), feed=data,
fetch_list=fetch_targets) fetch_list=fetch_targets)
loss_sum += res[0] loss_sum += res[0]
label_data = list(np.array(data[0]["label"]))
for i in range(len(data)): for i in range(len(label_data)):
if data[i][4] > 0.5: if label_data[i] > 0.5:
score.append([0, 1, res[1][i]]) score.append([0, 1, res[1][i]])
else: else:
score.append([1, 0, res[1][i]]) score.append([1, 0, res[1][i]])
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import paddle.fluid as fluid import paddle.fluid as fluid
def din_attention(hist, target_expand, max_len, mask): def din_attention(hist, target_expand, mask):
"""activation weight""" """activation weight"""
hidden_size = hist.shape[-1] hidden_size = hist.shape[-1]
...@@ -45,9 +45,10 @@ def din_attention(hist, target_expand, max_len, mask): ...@@ -45,9 +45,10 @@ def din_attention(hist, target_expand, max_len, mask):
return out return out
def network(item_count, cat_count, max_len): def network(item_count, cat_count):
"""network definition""" """network definition"""
seq_len = -1
item_emb_size = 64 item_emb_size = 64
cat_emb_size = 64 cat_emb_size = 64
is_sparse = False is_sparse = False
...@@ -56,60 +57,60 @@ def network(item_count, cat_count, max_len): ...@@ -56,60 +57,60 @@ def network(item_count, cat_count, max_len):
item_emb_attr = fluid.ParamAttr(name="item_emb") item_emb_attr = fluid.ParamAttr(name="item_emb")
cat_emb_attr = fluid.ParamAttr(name="cat_emb") cat_emb_attr = fluid.ParamAttr(name="cat_emb")
hist_item_seq = fluid.layers.data( hist_item_seq = fluid.data(
name="hist_item_seq", shape=[max_len, 1], dtype="int64") name="hist_item_seq", shape=[None, seq_len], dtype="int64")
hist_cat_seq = fluid.layers.data( hist_cat_seq = fluid.data(
name="hist_cat_seq", shape=[max_len, 1], dtype="int64") name="hist_cat_seq", shape=[None, seq_len], dtype="int64")
target_item = fluid.layers.data( target_item = fluid.data(
name="target_item", shape=[1], dtype="int64") name="target_item", shape=[None], dtype="int64")
target_cat = fluid.layers.data( target_cat = fluid.data(
name="target_cat", shape=[1], dtype="int64") name="target_cat", shape=[None], dtype="int64")
label = fluid.layers.data( label = fluid.data(
name="label", shape=[1], dtype="float32") name="label", shape=[None, 1], dtype="float32")
mask = fluid.layers.data( mask = fluid.data(
name="mask", shape=[max_len, 1], dtype="float32") name="mask", shape=[None, seq_len, 1], dtype="float32")
target_item_seq = fluid.layers.data( target_item_seq = fluid.data(
name="target_item_seq", shape=[max_len, 1], dtype="int64") name="target_item_seq", shape=[None, seq_len], dtype="int64")
target_cat_seq = fluid.layers.data( target_cat_seq = fluid.data(
name="target_cat_seq", shape=[max_len, 1], dtype="int64", lod_level=0) name="target_cat_seq", shape=[None, seq_len], dtype="int64")
hist_item_emb = fluid.layers.embedding( hist_item_emb = fluid.embedding(
input=hist_item_seq, input=hist_item_seq,
size=[item_count, item_emb_size], size=[item_count, item_emb_size],
param_attr=item_emb_attr, param_attr=item_emb_attr,
is_sparse=is_sparse) is_sparse=is_sparse)
hist_cat_emb = fluid.layers.embedding( hist_cat_emb = fluid.embedding(
input=hist_cat_seq, input=hist_cat_seq,
size=[cat_count, cat_emb_size], size=[cat_count, cat_emb_size],
param_attr=cat_emb_attr, param_attr=cat_emb_attr,
is_sparse=is_sparse) is_sparse=is_sparse)
target_item_emb = fluid.layers.embedding( target_item_emb = fluid.embedding(
input=target_item, input=target_item,
size=[item_count, item_emb_size], size=[item_count, item_emb_size],
param_attr=item_emb_attr, param_attr=item_emb_attr,
is_sparse=is_sparse) is_sparse=is_sparse)
target_cat_emb = fluid.layers.embedding( target_cat_emb = fluid.embedding(
input=target_cat, input=target_cat,
size=[cat_count, cat_emb_size], size=[cat_count, cat_emb_size],
param_attr=cat_emb_attr, param_attr=cat_emb_attr,
is_sparse=is_sparse) is_sparse=is_sparse)
target_item_seq_emb = fluid.layers.embedding( target_item_seq_emb = fluid.embedding(
input=target_item_seq, input=target_item_seq,
size=[item_count, item_emb_size], size=[item_count, item_emb_size],
param_attr=item_emb_attr, param_attr=item_emb_attr,
is_sparse=is_sparse) is_sparse=is_sparse)
target_cat_seq_emb = fluid.layers.embedding( target_cat_seq_emb = fluid.embedding(
input=target_cat_seq, input=target_cat_seq,
size=[cat_count, cat_emb_size], size=[cat_count, cat_emb_size],
param_attr=cat_emb_attr, param_attr=cat_emb_attr,
is_sparse=is_sparse) is_sparse=is_sparse)
item_b = fluid.layers.embedding( item_b = fluid.embedding(
input=target_item, input=target_item,
size=[item_count, 1], size=[item_count, 1],
param_attr=fluid.initializer.Constant(value=0.0)) param_attr=fluid.initializer.Constant(value=0.0))
...@@ -120,7 +121,7 @@ def network(item_count, cat_count, max_len): ...@@ -120,7 +121,7 @@ def network(item_count, cat_count, max_len):
target_concat = fluid.layers.concat( target_concat = fluid.layers.concat(
[target_item_emb, target_cat_emb], axis=1) [target_item_emb, target_cat_emb], axis=1)
out = din_attention(hist_seq_concat, target_seq_concat, max_len, mask) out = din_attention(hist_seq_concat, target_seq_concat, mask)
out_fc = fluid.layers.fc(name="out_fc", out_fc = fluid.layers.fc(name="out_fc",
input=out, input=out,
size=item_emb_size + cat_emb_size, size=item_emb_size + cat_emb_size,
...@@ -137,4 +138,6 @@ def network(item_count, cat_count, max_len): ...@@ -137,4 +138,6 @@ def network(item_count, cat_count, max_len):
loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=logit, label=label) loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=logit, label=label)
avg_loss = fluid.layers.mean(loss) avg_loss = fluid.layers.mean(loss)
return avg_loss, fluid.layers.sigmoid(logit) return avg_loss, fluid.layers.sigmoid(logit), \
[hist_item_seq, hist_cat_seq, target_item, \
target_cat, label, mask, target_item_seq, target_cat_seq]
...@@ -20,7 +20,7 @@ import pickle ...@@ -20,7 +20,7 @@ import pickle
def pad_batch_data(input, max_len): def pad_batch_data(input, max_len):
res = np.array([x + [0] * (max_len - len(x)) for x in input]) res = np.array([x + [0] * (max_len - len(x)) for x in input])
res = res.astype("int64").reshape([-1, max_len, 1]) res = res.astype("int64").reshape([-1, max_len])
return res return res
...@@ -34,10 +34,10 @@ def make_data(b): ...@@ -34,10 +34,10 @@ def make_data(b):
[-1, max_len, 1]) [-1, max_len, 1])
target_item_seq = np.array( target_item_seq = np.array(
[[x[2]] * max_len for x in b]).astype("int64").reshape( [[x[2]] * max_len for x in b]).astype("int64").reshape(
[-1, max_len, 1]) [-1, max_len])
target_cat_seq = np.array( target_cat_seq = np.array(
[[x[3]] * max_len for x in b]).astype("int64").reshape( [[x[3]] * max_len for x in b]).astype("int64").reshape(
[-1, max_len, 1]) [-1, max_len])
res = [] res = []
for i in range(len(b)): for i in range(len(b)):
res.append([ res.append([
......
...@@ -78,7 +78,7 @@ def train(): ...@@ -78,7 +78,7 @@ def train():
args.num_devices) args.num_devices)
logger.info("reading data completes") logger.info("reading data completes")
avg_cost, pred = network.network(item_count, cat_count, max_len) avg_cost, pred, feed_list = network.network(item_count, cat_count)
fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm( fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm(
clip_norm=5.0)) clip_norm=5.0))
base_lr = args.base_lr base_lr = args.base_lr
...@@ -94,12 +94,9 @@ def train(): ...@@ -94,12 +94,9 @@ def train():
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
feeder = fluid.DataFeeder( loader = fluid.io.DataLoader.from_generator(
feed_list=[ feed_list=feed_list, capacity=10000, iterable=True)
"hist_item_seq", "hist_cat_seq", "target_item", "target_cat", loader.set_sample_list_generator(data_reader, places=fluid.cuda_places())
"label", "mask", "target_item_seq", "target_cat_seq"
],
place=place)
if use_parallel: if use_parallel:
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
use_cuda=use_cuda, loss_name=avg_cost.name) use_cuda=use_cuda, loss_name=avg_cost.name)
...@@ -117,9 +114,9 @@ def train(): ...@@ -117,9 +114,9 @@ def train():
loss_sum = 0.0 loss_sum = 0.0
for id in range(epoch_num): for id in range(epoch_num):
epoch = id + 1 epoch = id + 1
for data in data_reader(): for data in loader():
global_step += 1 global_step += 1
results = train_exe.run(feed=feeder.feed(data), results = train_exe.run(feed=data,
fetch_list=[avg_cost.name, pred.name], fetch_list=[avg_cost.name, pred.name],
return_numpy=True) return_numpy=True)
loss_sum += results[0].mean() loss_sum += results[0].mean()
......
...@@ -55,7 +55,7 @@ def infer(args): ...@@ -55,7 +55,7 @@ def infer(args):
test_data = reader.Data(args.test_path, False) test_data = reader.Data(args.test_path, False)
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, args.step) loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, args.step, batch_size)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
infer_program = fluid.default_main_program().clone(for_test=True) infer_program = fluid.default_main_program().clone(for_test=True)
...@@ -70,7 +70,7 @@ def infer(args): ...@@ -70,7 +70,7 @@ def infer(args):
loss_sum = 0.0 loss_sum = 0.0
acc_sum = 0.0 acc_sum = 0.0
count = 0 count = 0
py_reader.decorate_paddle_reader(test_data.reader(batch_size, batch_size*20, False)) py_reader.set_sample_list_generator(test_data.reader(batch_size, batch_size*20, False))
py_reader.start() py_reader.start()
try: try:
while True: while True:
......
...@@ -19,57 +19,53 @@ import paddle.fluid as fluid ...@@ -19,57 +19,53 @@ import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
def network(items_num, hidden_size, step): def network(items_num, hidden_size, step, bs):
stdv = 1.0 / math.sqrt(hidden_size) stdv = 1.0 / math.sqrt(hidden_size)
items = layers.data( items = fluid.data(
name="items", name="items",
shape=[1, 1], shape=[bs, -1],
dtype="int64") #[batch_size, uniq_max, 1] dtype="int64") #[batch_size, uniq_max]
seq_index = layers.data( seq_index = fluid.data(
name="seq_index", name="seq_index",
shape=[1], shape=[bs, -1, 2],
dtype="int32") #[batch_size, seq_max] dtype="int32") #[batch_size, seq_max, 2]
last_index = layers.data( last_index = fluid.data(
name="last_index", name="last_index",
shape=[1], shape=[bs, 2],
dtype="int32") #[batch_size, 1] dtype="int32") #[batch_size, 2]
adj_in = layers.data( adj_in = fluid.data(
name="adj_in", name="adj_in",
shape=[1,1], shape=[bs, -1, -1],
dtype="float32") #[batch_size, seq_max, seq_max] dtype="float32") #[batch_size, seq_max, seq_max]
adj_out = layers.data( adj_out = fluid.data(
name="adj_out", name="adj_out",
shape=[1,1], shape=[bs, -1, -1],
dtype="float32") #[batch_size, seq_max, seq_max] dtype="float32") #[batch_size, seq_max, seq_max]
mask = layers.data( mask = fluid.data(
name="mask", name="mask",
shape=[1, 1], shape=[bs, -1, 1],
dtype="float32") #[batch_size, seq_max, 1] dtype="float32") #[batch_size, seq_max, 1]
label = layers.data( label = fluid.data(
name="label", name="label",
shape=[1], shape=[bs, 1],
dtype="int64") #[batch_size, 1] dtype="int64") #[batch_size, 1]
datas = [items, seq_index, last_index, adj_in, adj_out, mask, label] datas = [items, seq_index, last_index, adj_in, adj_out, mask, label]
py_reader = fluid.layers.create_py_reader_by_data( py_reader = fluid.io.DataLoader.from_generator(capacity=256, feed_list=datas, iterable=False)
capacity=256, feed_list=datas, name='py_reader', use_double_buffer=True) feed_datas = datas
feed_datas = fluid.layers.read_file(py_reader)
items, seq_index, last_index, adj_in, adj_out, mask, label = feed_datas
items_emb = layers.embedding( items_emb = fluid.embedding(
input=items, input=items,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="emb", name="emb",
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)), low=-stdv, high=stdv)),
size=[items_num, hidden_size]) #[batch_size, uniq_max, h] size=[items_num, hidden_size]) #[batch_size, uniq_max, h]
items_emb_shape = layers.shape(items_emb)
pre_state = items_emb pre_state = items_emb
for i in range(step): for i in range(step):
pre_state = layers.reshape( pre_state = layers.reshape(x=pre_state, shape=[bs, -1, hidden_size])
x=pre_state, shape=[-1, 1, hidden_size], actual_shape=items_emb_shape)
state_in = layers.fc( state_in = layers.fc(
input=pre_state, input=pre_state,
name="state_in", name="state_in",
...@@ -104,24 +100,12 @@ def network(items_num, hidden_size, step): ...@@ -104,24 +100,12 @@ def network(items_num, hidden_size, step):
bias_attr=False) bias_attr=False)
pre_state, _, _ = fluid.layers.gru_unit( pre_state, _, _ = fluid.layers.gru_unit(
input=gru_fc, input=gru_fc,
hidden=layers.reshape( hidden=layers.reshape(x=pre_state, shape=[-1, hidden_size]),
x=pre_state, shape=[-1, hidden_size]),
size=3 * hidden_size) size=3 * hidden_size)
final_state = pre_state #[batch_size * uniq_max, h] final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size])
seq = layers.gather_nd(final_state, seq_index)
seq_origin_shape = layers.assign(np.array([0,0,hidden_size-1]).astype("int32")) last = layers.gather_nd(final_state, last_index)
seq_origin_shape += layers.shape(layers.unsqueeze(seq_index,[2])) #value: [batch_size, seq_max, h]
seq_origin_shape.stop_gradient = True
seq_index = layers.reshape(seq_index, shape=[-1])
seq = layers.gather(final_state, seq_index) #[batch_size * seq_max, h]
last = layers.gather(final_state, last_index) #[batch_size, h]
seq = layers.reshape(
seq, shape=[-1, 1, hidden_size], actual_shape=seq_origin_shape) #[batch_size, seq_max, h]
last = layers.reshape(
last, shape=[-1, hidden_size]) #[batch_size, h]
seq_fc = layers.fc( seq_fc = layers.fc(
input=seq, input=seq,
...@@ -184,13 +168,13 @@ def network(items_num, hidden_size, step): ...@@ -184,13 +168,13 @@ def network(items_num, hidden_size, step):
low=-stdv, high=stdv))) #[batch_size, h] low=-stdv, high=stdv))) #[batch_size, h]
all_vocab = layers.create_global_var( all_vocab = layers.create_global_var(
shape=[items_num - 1, 1], shape=[items_num - 1],
value=0, value=0,
dtype="int64", dtype="int64",
persistable=True, persistable=True,
name="all_vocab") name="all_vocab")
all_emb = layers.embedding( all_emb = fluid.embedding(
input=all_vocab, input=all_vocab,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="emb", name="emb",
......
...@@ -64,19 +64,19 @@ class Data(): ...@@ -64,19 +64,19 @@ class Data():
adj_out.append(np.divide(adj.transpose(), u_deg_out).transpose()) adj_out.append(np.divide(adj.transpose(), u_deg_out).transpose())
seq_index.append( seq_index.append(
[np.where(node == i)[0][0] + id * max_uniq_len for i in e[0]]) [[id, np.where(node == i)[0][0]] for i in e[0]])
last_index.append( last_index.append(
np.where(node == e[0][last_id[id]])[0][0] + id * max_uniq_len) [id, np.where(node == e[0][last_id[id]])[0][0]])
label.append(e[1] - 1) label.append(e[1] - 1)
mask.append([[1] * (last_id[id] + 1) + [0] * mask.append([[1] * (last_id[id] + 1) + [0] *
(max_seq_len - last_id[id] - 1)]) (max_seq_len - last_id[id] - 1)])
id += 1 id += 1
items = np.array(items).astype("int64").reshape((batch_size, -1, 1)) items = np.array(items).astype("int64").reshape((batch_size, -1))
seq_index = np.array(seq_index).astype("int32").reshape( seq_index = np.array(seq_index).astype("int32").reshape(
(batch_size, -1)) (batch_size, -1, 2))
last_index = np.array(last_index).astype("int32").reshape( last_index = np.array(last_index).astype("int32").reshape(
(batch_size)) (batch_size, 2))
adj_in = np.array(adj_in).astype("float32").reshape( adj_in = np.array(adj_in).astype("float32").reshape(
(batch_size, max_uniq_len, max_uniq_len)) (batch_size, max_uniq_len, max_uniq_len))
adj_out = np.array(adj_out).astype("float32").reshape( adj_out = np.array(adj_out).astype("float32").reshape(
...@@ -110,8 +110,10 @@ class Data(): ...@@ -110,8 +110,10 @@ class Data():
cur_batch = remain_data[i:i + batch_size] cur_batch = remain_data[i:i + batch_size]
yield self.make_data(cur_batch, batch_size) yield self.make_data(cur_batch, batch_size)
else: else:
cur_batch = remain_data[i:] # Due to fixed batch_size, discard the remaining ins
yield self.make_data(cur_batch, group_remain % batch_size) return
#cur_batch = remain_data[i:]
#yield self.make_data(cur_batch, group_remain % batch_size)
return _reader return _reader
......
...@@ -72,7 +72,7 @@ def train(): ...@@ -72,7 +72,7 @@ def train():
batch_size = args.batch_size batch_size = args.batch_size
items_num = reader.read_config(args.config_path) items_num = reader.read_config(args.config_path)
loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size,
args.step) args.step, batch_size)
data_reader = reader.Data(args.train_path, True) data_reader = reader.Data(args.train_path, True)
logger.info("load data complete") logger.info("load data complete")
...@@ -96,7 +96,7 @@ def train(): ...@@ -96,7 +96,7 @@ def train():
all_vocab = fluid.global_scope().var("all_vocab").get_tensor() all_vocab = fluid.global_scope().var("all_vocab").get_tensor()
all_vocab.set( all_vocab.set(
np.arange(1, items_num).astype("int64").reshape((-1, 1)), place) np.arange(1, items_num).astype("int64").reshape((-1)), place)
feed_list = [e.name for e in feed_datas] feed_list = [e.name for e in feed_datas]
...@@ -115,7 +115,8 @@ def train(): ...@@ -115,7 +115,8 @@ def train():
acc_sum = 0.0 acc_sum = 0.0
global_step = 0 global_step = 0
PRINT_STEP = 500 PRINT_STEP = 500
py_reader.decorate_paddle_reader(data_reader.reader(batch_size, batch_size * 20, True)) #py_reader.decorate_paddle_reader(data_reader.reader(batch_size, batch_size * 20, True))
py_reader.set_sample_list_generator(data_reader.reader(batch_size, batch_size * 20, True))
for i in range(args.epoch_num): for i in range(args.epoch_num):
epoch_sum = [] epoch_sum = []
py_reader.start() py_reader.start()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册