diff --git a/PaddleRec/multi-task/MMoE/README.md b/PaddleRec/multi-task/MMoE/README.md index 08b884e97b241d13cf90f8611b32a0bb32c5351d..586cda780e8833916bee93c1f65f74a3ff8ef1e7 100644 --- a/PaddleRec/multi-task/MMoE/README.md +++ b/PaddleRec/multi-task/MMoE/README.md @@ -50,7 +50,7 @@ train_path="data/census-income.data" test_path="data/census-income.test" train_data_path="train_data/" test_data_path="test_data/" - +pip install -r requirements.txt wget -P data/ https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census.tar.gz tar -zxvf data/census.tar.gz -C data/ diff --git a/PaddleRec/multi-task/MMoE/args.py b/PaddleRec/multi-task/MMoE/args.py index 1fbb7b5ca0f75c604683faee037de435a05fc1fa..d20eca4acf67822b51442c163217f5f30178af37 100644 --- a/PaddleRec/multi-task/MMoE/args.py +++ b/PaddleRec/multi-task/MMoE/args.py @@ -30,9 +30,9 @@ def parse_args(): parser.add_argument("--epochs", type=int, default=400, help="epochs") parser.add_argument("--batch_size", type=int, default=32, help="batch_size") parser.add_argument('--use_gpu', type=int, default=0, help='whether using gpu') - parser.add_argument('--model_dir',type=str, default='./model_dir', help="model_dir") - parser.add_argument('--train_data_path',type=str, default='./train_data', help="train_data_path") - parser.add_argument('--test_data_path',type=str, default='./test_data', help="test_data_path") + parser.add_argument('--model_dir',type=str, default='model_dir', help="model_dir") + parser.add_argument('--train_data_path',type=str, default='train_data', help="train_data_path") + parser.add_argument('--test_data_path',type=str, default='test_data', help="test_data_path") args = parser.parse_args() return args @@ -43,6 +43,5 @@ def data_preparation_args(): parser.add_argument('--train_data_path',type=str, default='', help="train_data_path") parser.add_argument('--test_data_path',type=str, default='', help="test_data_path") - parser.add_argument('--validation_data_path',type=str, default='', help="validation_data_path") args = parser.parse_args() return args diff --git a/PaddleRec/multi-task/MMoE/create_data.sh b/PaddleRec/multi-task/MMoE/create_data.sh index 95b53d0e7fe59fc825f0ca39cad26c8b01a0970e..b60d42b37057593b1c16aa5fd91b8217a5a71bbf 100644 --- a/PaddleRec/multi-task/MMoE/create_data.sh +++ b/PaddleRec/multi-task/MMoE/create_data.sh @@ -5,7 +5,6 @@ train_path="data/census-income.data" test_path="data/census-income.test" train_data_path="train_data/" test_data_path="test_data/" - pip install -r requirements.txt wget -P data/ https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census.tar.gz diff --git a/PaddleRec/multi-task/MMoE/data_preparation.py b/PaddleRec/multi-task/MMoE/data_preparation.py index a89dbbe43fd7c66330a6c0f2b52af4a396cb14db..1932e81570d13bd441306819255af835eaa20237 100644 --- a/PaddleRec/multi-task/MMoE/data_preparation.py +++ b/PaddleRec/multi-task/MMoE/data_preparation.py @@ -18,8 +18,7 @@ def fun2(x): return 0 -def data_preparation(train_path, test_path, train_data_path, test_data_path, - validation_data_path): +def data_preparation(train_path, test_path, train_data_path, test_data_path): # The column names are from # https://www2.1010data.com/documentationcenter/prod/Tutorials/MachineLearningExamples/CensusIncomeDataSet.html column_names = [ @@ -102,4 +101,4 @@ def data_preparation(train_path, test_path, train_data_path, test_data_path, args = data_preparation_args() data_preparation(args.train_path, args.test_path, args.train_data_path, - args.test_data_path, args.validation_data_path) + args.test_data_path) diff --git a/PaddleRec/multi-task/MMoE/mmoe_train.py b/PaddleRec/multi-task/MMoE/mmoe_train.py index 5a4fc8887abdcf6f5dbc98d678ba9c9b6661be0a..c3a1d00b8d06525199acb21bfd0d418c6a0de0da 100644 --- a/PaddleRec/multi-task/MMoE/mmoe_train.py +++ b/PaddleRec/multi-task/MMoE/mmoe_train.py @@ -69,8 +69,11 @@ def MMOE(feature_size=499,expert_num=8, gate_num=2, expert_size=16, tower_size=8 label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2]) label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2]) - auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=output_layers[0], label=fluid.layers.cast(x=label_income_1, dtype='int64')) - auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=output_layers[1], label=fluid.layers.cast(x=label_marital_1, dtype='int64')) + pred_income = fluid.layers.clip(output_layers[0], min=1e-10, max=1.0 - 1e-10) + pred_marital = fluid.layers.clip(output_layers[1], min=1e-10, max=1.0 - 1e-10) + + auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64')) + auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64')) avg_cost_income = fluid.layers.mean(x=cost_income) avg_cost_marital = fluid.layers.mean(x=cost_marital) @@ -116,7 +119,6 @@ test_loader = fluid.io.DataLoader.from_generator(feed_list=data_list, capacity=b test_loader.set_sample_list_generator(test_reader, places=place) auc_income_list = [] auc_marital_list = [] -mmoe_res_file = open('mmoe_res.txt', 'w',encoding='utf-8') for epoch in range(epochs): for var in auc_states_1: # reset auc states set_zero(var.name,place=place) diff --git a/PaddleRec/multi-task/Share_bottom/README.md b/PaddleRec/multi-task/Share_bottom/README.md index 1d2182d74e4a9dc687af58bb9639c1194d466d7c..a996144da1f92fb9ff9bb120965882d088043598 100644 --- a/PaddleRec/multi-task/Share_bottom/README.md +++ b/PaddleRec/multi-task/Share_bottom/README.md @@ -50,7 +50,7 @@ train_path="data/census-income.data" test_path="data/census-income.test" train_data_path="train_data/" test_data_path="test_data/" - +pip install -r requirements.txt wget -P data/ https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census.tar.gz tar -zxvf data/census.tar.gz -C data/ diff --git a/PaddleRec/multi-task/Share_bottom/args.py b/PaddleRec/multi-task/Share_bottom/args.py index 65c8c68683cdecd07c356010d7479aa968b3f7a4..5ea0698dcf29a14d39e7ccd4cdd97f947a9ac3c2 100644 --- a/PaddleRec/multi-task/Share_bottom/args.py +++ b/PaddleRec/multi-task/Share_bottom/args.py @@ -40,15 +40,7 @@ def data_preparation_args(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--train_path", type=str, default='', help="train_path") parser.add_argument("--test_path", type=str, default='', help="test_path") - - parser.add_argument( - '--train_data_path', type=str, default='train_data', help="train_data_path") - parser.add_argument( - '--test_data_path', type=str, default='test_data', help="test_data_path") - parser.add_argument( - '--validation_data_path', - type=str, - default='', - help="validation_data_path") + parser.add_argument('--train_data_path', type=str, default='train_data', help="train_data_path") + parser.add_argument('--test_data_path', type=str, default='test_data', help="test_data_path") args = parser.parse_args() return args diff --git a/PaddleRec/multi-task/Share_bottom/data_preparation.py b/PaddleRec/multi-task/Share_bottom/data_preparation.py index a89dbbe43fd7c66330a6c0f2b52af4a396cb14db..406ee00b096c5bf3ed21420d8917ef068204a186 100644 --- a/PaddleRec/multi-task/Share_bottom/data_preparation.py +++ b/PaddleRec/multi-task/Share_bottom/data_preparation.py @@ -18,8 +18,7 @@ def fun2(x): return 0 -def data_preparation(train_path, test_path, train_data_path, test_data_path, - validation_data_path): +def data_preparation(train_path, test_path, train_data_path, test_data_path): # The column names are from # https://www2.1010data.com/documentationcenter/prod/Tutorials/MachineLearningExamples/CensusIncomeDataSet.html column_names = [ @@ -101,5 +100,4 @@ def data_preparation(train_path, test_path, train_data_path, test_data_path, args = data_preparation_args() -data_preparation(args.train_path, args.test_path, args.train_data_path, - args.test_data_path, args.validation_data_path) +data_preparation(args.train_path, args.test_path, args.train_data_path,args.test_data_path) diff --git a/PaddleRec/multi-task/Share_bottom/share_bottom.py b/PaddleRec/multi-task/Share_bottom/share_bottom.py index 9a464834cab476c0b3bb98769ac72b4be47bf76c..e0dcafbf94dba0b792cbbad613f901ece753de4e 100644 --- a/PaddleRec/multi-task/Share_bottom/share_bottom.py +++ b/PaddleRec/multi-task/Share_bottom/share_bottom.py @@ -56,8 +56,11 @@ def share_bottom(feature_size=499,bottom_size=117,tower_nums=2,tower_size=8): label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2]) label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2]) - auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=output_layers[0], label=fluid.layers.cast(x=label_income_1, dtype='int64')) - auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=output_layers[1], label=fluid.layers.cast(x=label_marital_1, dtype='int64')) + pred_income = fluid.layers.clip(output_layers[0], min=1e-10, max=1.0 - 1e-10) + pred_marital = fluid.layers.clip(output_layers[1], min=1e-10, max=1.0 - 1e-10) + + auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64')) + auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64')) avg_cost_income = fluid.layers.mean(x=cost_income) avg_cost_marital = fluid.layers.mean(x=cost_marital) diff --git a/PaddleRec/multi-task/Share_bottom/train_gpu.sh b/PaddleRec/multi-task/Share_bottom/train_gpu.sh index 8cd4927efa80e4af0f05c52ead988804f5a00836..a8c6cb3dcabaea3235828185f29548194e6edb03 100644 --- a/PaddleRec/multi-task/Share_bottom/train_gpu.sh +++ b/PaddleRec/multi-task/Share_bottom/train_gpu.sh @@ -1,5 +1,7 @@ -python share_bottom.py --use_gpu 1 \ +CUDA_VISIBLE_DEVICES=0 python share_bottom.py --use_gpu 1 \ --epochs 100 \ + --train_data_path 'train_data' \ + --test_data_path 'test_data' \ --train_data_path '.train_data' \ --test_data_path '.test_data' \ --model_dir 'model_dir' \