diff --git a/examples/erniesage/config/erniesage_v2_gpu.yaml b/examples/erniesage/config/erniesage_v2_gpu.yaml index a346808c8c1f7a5fe36544c6d2dc06eda98e0ed8..f135863f30e90ccd7bec6d0cf98d3917b41d44df 100644 --- a/examples/erniesage/config/erniesage_v2_gpu.yaml +++ b/examples/erniesage/config/erniesage_v2_gpu.yaml @@ -6,9 +6,9 @@ optimizer_type: "adam" lr: 0.00005 batch_size: 32 CPU_NUM: 10 -epoch: 20 -log_per_step: 1 -save_per_step: 100 +epoch: 3 +log_per_step: 10 +save_per_step: 1000 output_path: "./output" ckpt_path: "./ernie_base_ckpt" @@ -31,6 +31,7 @@ final_fc: true final_l2_norm: true loss_type: "hinge" margin: 0.3 +neg_type: "random_neg" # infer config ------ infer_model: "./output/last" diff --git a/examples/erniesage/dataset/graph_reader.py b/examples/erniesage/dataset/graph_reader.py index 99d029a98a3ac0f482cdf5d4cd6591967ce86495..8727fcb6ad7018c57252529c6d4e44bd50ea4085 100644 --- a/examples/erniesage/dataset/graph_reader.py +++ b/examples/erniesage/dataset/graph_reader.py @@ -86,6 +86,7 @@ class GraphGenerator(BaseDataGenerator): nodes = np.unique(np.concatenate([batch_src, batch_dst, batch_neg], 0)) subgraphs = graphsage_sample(self.graph, nodes, self.samples, ignore_edges=ignore_edges) + #subgraphs[0].reindex_to_parrent_nodes(subgraphs[0].nodes) feed_dict = {} for i in range(self.num_layers): feed_dict.update(self.graph_wrappers[i].to_feed(subgraphs[i])) @@ -97,7 +98,7 @@ class GraphGenerator(BaseDataGenerator): feed_dict["user_index"] = np.array(sub_src_idx, dtype="int64") feed_dict["item_index"] = np.array(sub_dst_idx, dtype="int64") - #feed_dict["neg_item_index"] = np.array(sub_neg_idx, dtype="int64") + feed_dict["neg_item_index"] = np.array(sub_neg_idx, dtype="int64") feed_dict["term_ids"] = self.term_ids[subgraphs[0].node_feat["index"]] return feed_dict diff --git a/examples/erniesage/infer.py b/examples/erniesage/infer.py index 20735ddc487e216c309f9bcfccc8a8ed3a602873..26ce0c456504dfae2f9326e2c49bbc709cd9d328 100644 --- a/examples/erniesage/infer.py +++ b/examples/erniesage/infer.py @@ -72,7 +72,7 @@ def run_predict(py_reader, for batch_feed_dict in py_reader(): batch += 1 - batch_usr_feat, batch_ad_feat, batch_src_real_index = exe.run( + batch_usr_feat, batch_ad_feat, _, batch_src_real_index = exe.run( program, feed=batch_feed_dict, fetch_list=model_dict.outputs) diff --git a/examples/erniesage/learner.py b/examples/erniesage/learner.py index 7316dcf42cb6602809c7682c13421de13c499243..b874d59e4c0e1fc9f5dbbce67ff759e0560bb572 100644 --- a/examples/erniesage/learner.py +++ b/examples/erniesage/learner.py @@ -193,6 +193,7 @@ class CollectiveLearner(Learner): def optimize(self, loss, optimizer_type, lr): optimizer = F.optimizer.Adam(learning_rate=lr) dist_strategy = DistributedStrategy() + dist_strategy.enable_sequential_execution = True optimizer = cfleet.distributed_optimizer(optimizer, strategy=dist_strategy) _, param_grads = optimizer.minimize(loss, F.default_startup_program()) diff --git a/examples/erniesage/local_run.sh b/examples/erniesage/local_run.sh index d76d18c14057ddcb06b7d8b5aa5edac3357587c5..df850f94a764dc35e250fcbb5a3685737d1d0fc9 100644 --- a/examples/erniesage/local_run.sh +++ b/examples/erniesage/local_run.sh @@ -50,7 +50,6 @@ transpiler_local_train(){ } collective_local_train(){ - export PATH=./python27-gcc482-gpu/bin/:$PATH echo `which python` python -m paddle.distributed.launch train.py --conf $config python -m paddle.distributed.launch infer.py --conf $config @@ -58,8 +57,7 @@ collective_local_train(){ eval $(parse_yaml $config) -python3 ./preprocessing/dump_graph.py -i $input_data -o $graph_path --encoding $encoding \ - -l $max_seqlen --vocab_file $ernie_vocab_file +python ./preprocessing/dump_graph.py -i $input_data -o $graph_path --encoding $encoding -l $max_seqlen --vocab_file $ernie_vocab_file if [[ $learner_type == "cpu" ]];then transpiler_local_train diff --git a/examples/erniesage/models/base.py b/examples/erniesage/models/base.py index 7df76fff9917d6ca22c0e8a98e5907209e954f88..f59f54b36d5727ce7acec6b1b782d0724e3b4be6 100644 --- a/examples/erniesage/models/base.py +++ b/examples/erniesage/models/base.py @@ -129,7 +129,9 @@ class BaseNet(object): "user_index", shape=[None], dtype="int64", append_batch_size=False) item_index = L.data( "item_index", shape=[None], dtype="int64", append_batch_size=False) - return [user_index, item_index] + neg_item_index = L.data( + "neg_item_index", shape=[None], dtype="int64", append_batch_size=False) + return [user_index, item_index, neg_item_index] def build_embedding(self, graph_wrappers, inputs=None): num_embed = int(np.load(os.path.join(self.config.graph_path, "num_nodes.npy"))) @@ -177,18 +179,58 @@ class BaseNet(object): outputs.append(src_real_index) return inputs, outputs +def all_gather(X): + trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + trainer_num = int(os.getenv("PADDLE_TRAINERS_NUM", "0")) + if trainer_num == 1: + copy_X = X * 1 + copy_X.stop_gradients=True + return copy_X + + Xs = [] + for i in range(trainer_num): + copy_X = X * 1 + copy_X = L.collective._broadcast(copy_X, i, True) + copy_X.stop_gradients=True + Xs.append(copy_X) + + if len(Xs) > 1: + Xs=L.concat(Xs, 0) + Xs.stop_gradients=True + else: + Xs = Xs[0] + return Xs + class BaseLoss(object): def __init__(self, config): self.config = config def __call__(self, outputs): - user_feat, item_feat = outputs[0], outputs[1] + user_feat, item_feat, neg_item_feat = outputs[0], outputs[1], outputs[2] loss_type = self.config.loss_type + + if self.config.neg_type == "batch_neg": + neg_item_feat = item_feat # Calc Loss if self.config.loss_type == "hinge": pos = L.reduce_sum(user_feat * item_feat, -1, keep_dim=True) # [B, 1] - neg = L.matmul(user_feat, item_feat, transpose_y=True) # [B, B] + neg = L.matmul(user_feat, neg_item_feat, transpose_y=True) # [B, B] loss = L.reduce_mean(L.relu(neg - pos + self.config.margin)) + elif self.config.loss_type == "all_hinge": + pos = L.reduce_sum(user_feat * item_feat, -1, keep_dim=True) # [B, 1] + all_pos = all_gather(pos) # [B * n, 1] + all_neg_item_feat = all_gather(neg_item_feat) # [B * n, 1] + all_user_feat = all_gather(user_feat) # [B * n, 1] + + neg1 = L.matmul(user_feat, all_neg_item_feat, transpose_y=True) # [B, B * n] + neg2 = L.matmul(all_user_feat, neg_item_feat, transpose_y=True) # [B *n, B] + + loss1 = L.reduce_mean(L.relu(neg1 - pos + self.config.margin)) + loss2 = L.reduce_mean(L.relu(neg2 - all_pos + self.config.margin)) + + #loss = (loss1 + loss2) / 2 + loss = loss1 + loss2 + elif self.config.loss_type == "softmax": pass # TODO diff --git a/examples/erniesage/models/ernie_model/ernie.py b/examples/erniesage/models/ernie_model/ernie.py index 3ba4f9bbd82f3889e66a8ff16aa7f1eee27abc79..06a75c82b95114aeaa00f0392c614b443cc83943 100644 --- a/examples/erniesage/models/ernie_model/ernie.py +++ b/examples/erniesage/models/ernie_model/ernie.py @@ -59,6 +59,8 @@ class ErnieModel(object): def __init__(self, src_ids, sentence_ids, + position_ids=None, + input_mask=None, task_ids=None, config=None, weight_sharing=True, @@ -66,8 +68,10 @@ class ErnieModel(object): name=""): self._set_config(config, name, weight_sharing) - input_mask = self._build_input_mask(src_ids) - position_ids = self._build_position_ids(src_ids) + if position_ids is None: + position_ids = self._build_position_ids(src_ids) + if input_mask is None: + input_mask = self._build_input_mask(src_ids) self._build_model(src_ids, position_ids, sentence_ids, task_ids, input_mask) self._debug_summary(input_mask) diff --git a/examples/erniesage/models/erniesage_v2.py b/examples/erniesage/models/erniesage_v2.py index e5c03c1fcc0076a8566035c5d523b2bfbf76eb7c..fec39f2f927be7f86dc88b6050ae7c45b096c822 100644 --- a/examples/erniesage/models/erniesage_v2.py +++ b/examples/erniesage/models/erniesage_v2.py @@ -3,8 +3,6 @@ import paddle.fluid as F import paddle.fluid.layers as L from models.base import BaseNet, BaseGNNModel from models.ernie_model.ernie import ErnieModel -from models.ernie_model.ernie import ErnieGraphModel -from models.ernie_model.ernie import ErnieConfig class ErnieSageV2(BaseNet): @@ -16,19 +14,52 @@ class ErnieSageV2(BaseNet): return inputs + [term_ids] def gnn_layer(self, gw, feature, hidden_size, act, initializer, learning_rate, name): + def build_position_ids(src_ids, dst_ids): + src_shape = L.shape(src_ids) + src_batch = src_shape[0] + src_seqlen = src_shape[1] + dst_seqlen = src_seqlen - 1 # without cls + + src_position_ids = L.reshape( + L.range( + 0, src_seqlen, 1, dtype='int32'), [1, src_seqlen, 1], + inplace=True) # [1, slot_seqlen, 1] + src_position_ids = L.expand(src_position_ids, [src_batch, 1, 1]) # [B, slot_seqlen * num_b, 1] + zero = L.fill_constant([1], dtype='int64', value=0) + input_mask = L.cast(L.equal(src_ids, zero), "int32") # assume pad id == 0 [B, slot_seqlen, 1] + src_pad_len = L.reduce_sum(input_mask, 1) # [B, 1, 1] + + dst_position_ids = L.reshape( + L.range( + src_seqlen, src_seqlen+dst_seqlen, 1, dtype='int32'), [1, dst_seqlen, 1], + inplace=True) # [1, slot_seqlen, 1] + dst_position_ids = L.expand(dst_position_ids, [src_batch, 1, 1]) # [B, slot_seqlen, 1] + dst_position_ids = dst_position_ids - src_pad_len # [B, slot_seqlen, 1] + + position_ids = L.concat([src_position_ids, dst_position_ids], 1) + position_ids = L.cast(position_ids, 'int64') + position_ids.stop_gradient = True + return position_ids + + def ernie_send(src_feat, dst_feat, edge_feat): """doc""" + # input_ids cls = L.fill_constant_batch_size_like(src_feat["term_ids"], [-1, 1, 1], "int64", 1) src_ids = L.concat([cls, src_feat["term_ids"]], 1) dst_ids = dst_feat["term_ids"] + # sent_ids sent_ids = L.concat([L.zeros_like(src_ids), L.ones_like(dst_ids)], 1) term_ids = L.concat([src_ids, dst_ids], 1) + # position_ids + position_ids = build_position_ids(src_ids, dst_ids) + term_ids.stop_gradient = True sent_ids.stop_gradient = True ernie = ErnieModel( - term_ids, sent_ids, + term_ids, sent_ids, position_ids, config=self.config.ernie_config) feature = ernie.get_pooled_output() return feature diff --git a/examples/erniesage/models/erniesage_v3.py b/examples/erniesage/models/erniesage_v3.py index 4fd9968a49213c4bc5fbb0deb5316e44bba30a12..1e805ba8e17d9633d0bb540f86557439d4fcdb06 100644 --- a/examples/erniesage/models/erniesage_v3.py +++ b/examples/erniesage/models/erniesage_v3.py @@ -18,7 +18,6 @@ import paddle.fluid.layers as L from models.base import BaseNet, BaseGNNModel from models.ernie_model.ernie import ErnieModel from models.ernie_model.ernie import ErnieGraphModel -from models.ernie_model.ernie import ErnieConfig from models.message_passing import copy_send diff --git a/examples/erniesage/preprocessing/dump_graph.py b/examples/erniesage/preprocessing/dump_graph.py index 06281456c29a11704e921a99cdd80d3dabfa0c3c..f2ec61e1b6eb5b3e5fa54ba0b15aecf94cae976a 100644 --- a/examples/erniesage/preprocessing/dump_graph.py +++ b/examples/erniesage/preprocessing/dump_graph.py @@ -53,6 +53,7 @@ def dump_graph(args): term_file = io.open(os.path.join(args.outpath, "terms.txt"), "w", encoding=args.encoding) terms = [] count = 0 + item_distribution = [] with io.open(args.inpath, encoding=args.encoding) as f: edges = [] @@ -66,6 +67,7 @@ def dump_graph(args): str2id[s] = count count += 1 term_file.write(str(col_idx) + "\t" + col + "\n") + item_distribution.append(0) slots.append(str2id[s]) @@ -74,6 +76,7 @@ def dump_graph(args): neg_samples.append(slots[2:]) edges.append((src, dst)) edges.append((dst, src)) + item_distribution[dst] += 1 term_file.close() edges = np.array(edges, dtype="int64") @@ -82,12 +85,14 @@ def dump_graph(args): log.info("building graph...") graph = pgl.graph.Graph(num_nodes=num_nodes, edges=edges) indegree = graph.indegree() + graph.indegree() graph.outdegree() graph.dump(args.outpath) # dump alias sample table - sqrt_indegree = np.sqrt(indegree) - distribution = 1. * sqrt_indegree / sqrt_indegree.sum() + item_distribution = np.array(item_distribution) + item_distribution = np.sqrt(item_distribution) + distribution = 1. * item_distribution / item_distribution.sum() alias, events = alias_sample_build_table(distribution) np.save(os.path.join(args.outpath, "alias.npy"), alias) np.save(os.path.join(args.outpath, "events.npy"), events)