diff --git a/PaddleRec/din/.run_ce.sh b/PaddleRec/ctr/din/.run_ce.sh similarity index 100% rename from PaddleRec/din/.run_ce.sh rename to PaddleRec/ctr/din/.run_ce.sh diff --git a/PaddleRec/din/README.md b/PaddleRec/ctr/din/README.md similarity index 100% rename from PaddleRec/din/README.md rename to PaddleRec/ctr/din/README.md diff --git a/PaddleRec/din/__init__.py b/PaddleRec/ctr/din/__init__.py similarity index 100% rename from PaddleRec/din/__init__.py rename to PaddleRec/ctr/din/__init__.py diff --git a/PaddleRec/din/_ce.py b/PaddleRec/ctr/din/_ce.py similarity index 78% rename from PaddleRec/din/_ce.py rename to PaddleRec/ctr/din/_ce.py index 9d5850fd22c3d023eb866fa474b6f6f586ca326e..e331d1bb7cccce5ac914dfa3417fe9090bd9cf99 100644 --- a/PaddleRec/din/_ce.py +++ b/PaddleRec/ctr/din/_ce.py @@ -6,18 +6,19 @@ sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi - -each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.08, 0, actived=True) +each_pass_duration_card1_kpi = DurationKpi( + 'each_pass_duration_card1', 0.08, 0, actived=True) train_loss_card1_kpi = CostKpi('train_loss_card1', 0.08, 0) -each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.08, 0, actived=True) +each_pass_duration_card4_kpi = DurationKpi( + 'each_pass_duration_card4', 0.08, 0, actived=True) train_loss_card4_kpi = CostKpi('train_loss_card4', 0.08, 0) tracking_kpis = [ - each_pass_duration_card1_kpi, - train_loss_card1_kpi, - each_pass_duration_card4_kpi, - train_loss_card4_kpi, - ] + each_pass_duration_card1_kpi, + train_loss_card1_kpi, + each_pass_duration_card4_kpi, + train_loss_card4_kpi, +] def parse_log(log): diff --git a/PaddleRec/din/cluster_train.py b/PaddleRec/ctr/din/cluster_train.py similarity index 100% rename from PaddleRec/din/cluster_train.py rename to PaddleRec/ctr/din/cluster_train.py diff --git a/PaddleRec/din/cluster_train.sh b/PaddleRec/ctr/din/cluster_train.sh similarity index 100% rename from PaddleRec/din/cluster_train.sh rename to PaddleRec/ctr/din/cluster_train.sh diff --git a/PaddleRec/din/data/build_dataset.py b/PaddleRec/ctr/din/data/build_dataset.py similarity index 100% rename from PaddleRec/din/data/build_dataset.py rename to PaddleRec/ctr/din/data/build_dataset.py diff --git a/PaddleRec/din/data/convert_pd.py b/PaddleRec/ctr/din/data/convert_pd.py similarity index 100% rename from PaddleRec/din/data/convert_pd.py rename to PaddleRec/ctr/din/data/convert_pd.py diff --git a/PaddleRec/din/data/data_process.sh b/PaddleRec/ctr/din/data/data_process.sh similarity index 100% rename from PaddleRec/din/data/data_process.sh rename to PaddleRec/ctr/din/data/data_process.sh diff --git a/PaddleRec/din/data/remap_id.py b/PaddleRec/ctr/din/data/remap_id.py similarity index 100% rename from PaddleRec/din/data/remap_id.py rename to PaddleRec/ctr/din/data/remap_id.py diff --git a/PaddleRec/din/infer.py b/PaddleRec/ctr/din/infer.py similarity index 92% rename from PaddleRec/din/infer.py rename to PaddleRec/ctr/din/infer.py index f0bbde2727a9ccc4a5f02e21982b4448f02d90b6..bccc1d648a89092cf8afb7cbfa8a8a6b0c7e68b2 100644 --- a/PaddleRec/din/infer.py +++ b/PaddleRec/ctr/din/infer.py @@ -79,16 +79,18 @@ def infer(): fetch_targets] = fluid.io.load_inference_model(model_path, exe) loader = fluid.io.DataLoader.from_generator( - feed_list=[inference_program.block(0).var(e) for e in feed_target_names], capacity=10000, iterable=True) + feed_list=[ + inference_program.block(0).var(e) for e in feed_target_names + ], + capacity=10000, + iterable=True) loader.set_sample_list_generator(data_reader, places=place) loss_sum = 0.0 score = [] count = 0 for data in loader(): - res = exe.run(inference_program, - feed=data, - fetch_list=fetch_targets) + res = exe.run(inference_program, feed=data, fetch_list=fetch_targets) loss_sum += res[0] label_data = list(np.array(data[0]["label"])) for i in range(len(label_data)): diff --git a/PaddleRec/din/network.py b/PaddleRec/ctr/din/network.py similarity index 93% rename from PaddleRec/din/network.py rename to PaddleRec/ctr/din/network.py index 17d4ae59ae924dae139b0be018ad7475062963a6..2d5c95421a65697e8d9fcea76aae30af388171bf 100644 --- a/PaddleRec/din/network.py +++ b/PaddleRec/ctr/din/network.py @@ -14,6 +14,7 @@ import paddle.fluid as fluid + def din_attention(hist, target_expand, mask): """activation weight""" @@ -61,14 +62,10 @@ def network(item_count, cat_count): name="hist_item_seq", shape=[None, seq_len], dtype="int64") hist_cat_seq = fluid.data( name="hist_cat_seq", shape=[None, seq_len], dtype="int64") - target_item = fluid.data( - name="target_item", shape=[None], dtype="int64") - target_cat = fluid.data( - name="target_cat", shape=[None], dtype="int64") - label = fluid.data( - name="label", shape=[None, 1], dtype="float32") - mask = fluid.data( - name="mask", shape=[None, seq_len, 1], dtype="float32") + target_item = fluid.data(name="target_item", shape=[None], dtype="int64") + target_cat = fluid.data(name="target_cat", shape=[None], dtype="int64") + label = fluid.data(name="label", shape=[None, 1], dtype="float32") + mask = fluid.data(name="mask", shape=[None, seq_len, 1], dtype="float32") target_item_seq = fluid.data( name="target_item_seq", shape=[None, seq_len], dtype="int64") target_cat_seq = fluid.data( diff --git a/PaddleRec/din/reader.py b/PaddleRec/ctr/din/reader.py similarity index 94% rename from PaddleRec/din/reader.py rename to PaddleRec/ctr/din/reader.py index 07d1c33c904517e863967b93cbb0e4fe2001e058..02122434f51e12b07ef8b238feecd53b0ad62bc4 100644 --- a/PaddleRec/din/reader.py +++ b/PaddleRec/ctr/din/reader.py @@ -18,6 +18,7 @@ import numpy as np import paddle import pickle + def pad_batch_data(input, max_len): res = np.array([x + [0] * (max_len - len(x)) for x in input]) res = res.astype("int64").reshape([-1, max_len]) @@ -33,11 +34,9 @@ def make_data(b): [[0] * x + [-1e9] * (max_len - x) for x in len_array]).reshape( [-1, max_len, 1]) target_item_seq = np.array( - [[x[2]] * max_len for x in b]).astype("int64").reshape( - [-1, max_len]) + [[x[2]] * max_len for x in b]).astype("int64").reshape([-1, max_len]) target_cat_seq = np.array( - [[x[3]] * max_len for x in b]).astype("int64").reshape( - [-1, max_len]) + [[x[3]] * max_len for x in b]).astype("int64").reshape([-1, max_len]) res = [] for i in range(len(b)): res.append([ diff --git a/PaddleRec/din/train.py b/PaddleRec/ctr/din/train.py similarity index 85% rename from PaddleRec/din/train.py rename to PaddleRec/ctr/din/train.py index 4b0b2854e7dfb3ad2189e43791d5324bf2e59300..a519721101b14f4d6de717f8a1f061ece5e85e7f 100644 --- a/PaddleRec/din/train.py +++ b/PaddleRec/ctr/din/train.py @@ -33,11 +33,20 @@ logger.setLevel(logging.INFO) def parse_args(): parser = argparse.ArgumentParser("din") parser.add_argument( - '--config_path', type=str, default='data/config.txt', help='dir of config') + '--config_path', + type=str, + default='data/config.txt', + help='dir of config') parser.add_argument( - '--train_dir', type=str, default='data/paddle_train.txt', help='dir of train file') + '--train_dir', + type=str, + default='data/paddle_train.txt', + help='dir of train file') parser.add_argument( - '--model_dir', type=str, default='din_amazon', help='dir of saved model') + '--model_dir', + type=str, + default='din_amazon', + help='dir of saved model') parser.add_argument( '--batch_size', type=int, default=16, help='number of batch size') parser.add_argument( @@ -45,15 +54,19 @@ def parse_args(): parser.add_argument( '--use_cuda', type=int, default=0, help='whether to use gpu') parser.add_argument( - '--parallel', type=int, default=0, help='whether to use parallel executor') + '--parallel', + type=int, + default=0, + help='whether to use parallel executor') parser.add_argument( '--base_lr', type=float, default=0.85, help='based learning rate') parser.add_argument( '--num_devices', type=int, default=1, help='Number of GPU devices') parser.add_argument( - '--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.') - parser.add_argument( - '--batch_num', type=int, help="batch num for ce") + '--enable_ce', + action='store_true', + help='If set, run the task with continuous evaluation logs.') + parser.add_argument('--batch_num', type=int, help="batch num for ce") args = parser.parse_args() return args @@ -133,8 +146,8 @@ def train(): if (global_step > 400000 and global_step % PRINT_STEP == 0) or ( global_step <= 400000 and global_step % 50000 == 0): - save_dir = os.path.join(args.model_dir, "global_step_" + str( - global_step)) + save_dir = os.path.join(args.model_dir, + "global_step_" + str(global_step)) feed_var_name = [ "hist_item_seq", "hist_cat_seq", "target_item", "target_cat", "label", "mask", "target_item_seq", @@ -156,10 +169,8 @@ def train(): ce_time = total_time[-1] except: print("ce info error") - print("kpis\teach_pass_duration_card%s\t%s" % - (gpu_num, ce_time)) - print("kpis\ttrain_loss_card%s\t%s" % - (gpu_num, ce_loss)) + print("kpis\teach_pass_duration_card%s\t%s" % (gpu_num, ce_time)) + print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, ce_loss)) def get_cards(args):