未验证 提交 53723856 编写于 作者: O overlordmax 提交者: GitHub

Mmoe 04162304 (#4539)

* fix some bugs

* edit README.md
上级 282e4890
......@@ -4,6 +4,7 @@
```
├── README.md # 文档
├── requirements.txt # 需要的安装包
├── mmoe_train.py # mmoe模型脚本
├── utils # 通用函数
├── args # 参数脚本
......@@ -21,15 +22,15 @@
1.income
> best:0.94856
> max_mmoe_test_auc_income:0.94937
>
> mean:0.944105
> mean_mmoe_test_auc_income:0.94465
2.marital
> best:0.99403
> max_mmoe_test_auc_marital:0.99419
>
> mean:0.99324
> mean_mmoe_test_auc_marital:0.99324
本项目支持GPU和CPU两种单机训练环境。
......@@ -74,15 +75,16 @@ GPU环境
```sh
CUDA_VISIBLE_DEVICES=0 python train_mmoe.py --use_gpu 1 \ #使用gpu训练
--train_path data/data24913/train_data/\ #训练数据路径
--test_path data/data24913/test_data/\ #测试数据路径
--feature_size 499\ #设置特征的维度
--batch_size 32\ #设置batch_size大小
--expert_num 8\ #设置expert数量
--gate_num 2\ #设置gate数量
--expert_size 16\ #设置expert网络大小
--tower_size 8\ #设置tower网络大小
--epochs 400 #设置epoch轮次
--train_data_path 'train_data'\ #训练数据路径
--test_data_path 'test_data'\ #测试数据路径
--model_dir 'model_dir'\ #模型保存地址
--feature_size 499\ #设置特征的维度
--batch_size 32\ #设置batch_size大小
--expert_num 8\ #设置expert数量
--gate_num 2\ #设置gate数量
--expert_size 16\ #设置expert网络大小
--tower_size 8\ #设置tower网络大小
--epochs 400 #设置epoch轮次
```
修改脚本的可执行权限并运行
......@@ -97,15 +99,16 @@ CPU环境
```sh
python train_mmoe.py --use_gpu 0 \ #使用cpu训练
--train_path data/data24913/train_data/\ #训练数据路径
--test_path data/data24913/test_data/\ #测试数据路径
--feature_size 499\ #设置特征的维度
--batch_size 32\ #设置batch_size大小
--expert_num 8\ #设置expert数量
--gate_num 2\ #设置gate数量
--expert_size 16\ #设置expert网络大小
--tower_size 8\ #设置tower网络大小
--epochs 400 #设置epoch轮次
--train_data_path 'train_data'\ #训练数据路径
--test_data_path 'test_data'\ #测试数据路径
--model_dir 'model_dir'\ #模型保存地址
--feature_size 499\ #设置特征的维度
--batch_size 32\ #设置batch_size大小
--expert_num 8\ #设置expert数量
--gate_num 2\ #设置gate数量
--expert_size 16\ #设置expert网络大小
--tower_size 8\ #设置tower网络大小
--epochs 400 #设置epoch轮次
```
修改脚本的可执行权限并运行
......@@ -124,6 +127,13 @@ python train_mmoe.py --use_gpu 0 \ #使用cpu训练
epoch设置为100的训练和测试效果如下:
![](./image/mmoe.png)
![](./image/mmoe2.png)
\ No newline at end of file
```text
batch_size:[32],feature_size:[499],expert_num:[8],gate_num[2],expert_size[16],tower_size[8],epochs:[100]
2020-04-16 11:28:06,- INFO - epoch_id: 0,epoch_time: 129.17434 s,loss: 0.62215,train_auc_income: 0.86302,train_auc_marital: 0.92316,test_auc_income: 0.84525,test_auc_marital: 0.98269
2020-04-16 11:30:36,- INFO - epoch_id: 1,epoch_time: 149.79017 s,loss: 0.42484,train_auc_income: 0.90634,train_auc_marital: 0.98418,test_auc_income:
......
2020-04-16 15:31:23,- INFO - epoch_id: 97,epoch_time: 147.07304 s,loss: 0.30267,train_auc_income: 0.94743,train_auc_marital: 0.99430,test_auc_income: 0.94905,test_auc_marital: 0.99414
2020-04-16 15:33:51,- INFO - epoch_id: 98,epoch_time: 148.34412 s,loss: 0.29688,train_auc_income: 0.94736,train_auc_marital: 0.99433,test_auc_income: 0.94846,test_auc_marital: 0.99409
2020-04-16 15:36:21,- INFO - epoch_id: 99,epoch_time: 149.91047 s,loss: 0.31330,train_auc_income: 0.94732,train_auc_marital: 0.99403,test_auc_income: 0.94881,test_auc_marital: 0.99386
2020-04-16 15:36:21,- INFO - mean_mmoe_test_auc_income: 0.94465,mean_mmoe_test_auc_marital 0.99324,max_mmoe_test_auc_income: 0.94937,max_mmoe_test_auc_marital 0.99419
```
......@@ -30,8 +30,9 @@ def parse_args():
parser.add_argument("--epochs", type=int, default=400, help="epochs")
parser.add_argument("--batch_size", type=int, default=32, help="batch_size")
parser.add_argument('--use_gpu', type=int, default=0, help='whether using gpu')
parser.add_argument('--train_data_path',type=str, default='./data/data24913/train_data/', help="train_data_path")
parser.add_argument('--test_data_path',type=str, default='./data/data24913/test_data/', help="test_data_path")
parser.add_argument('--model_dir',type=str, default='./model_dir', help="model_dir")
parser.add_argument('--train_data_path',type=str, default='./train_data', help="train_data_path")
parser.add_argument('--test_data_path',type=str, default='./test_data', help="test_data_path")
args = parser.parse_args()
return args
......
......@@ -6,6 +6,8 @@ test_path="data/census-income.test"
train_data_path="train_data/"
test_data_path="test_data/"
pip install -r requirements.txt
wget -P data/ https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census.tar.gz
tar -zxvf data/census.tar.gz -C data/
......
import pandas as pd
import numpy as np
import paddle.fluid as fluid
from sklearn.preprocessing import MinMaxScaler
from args import *
......
......@@ -3,14 +3,10 @@ import pandas as pd
import numpy as np
import paddle
import time
import datetime
import os
import utils
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from args import *
import warnings
warnings.filterwarnings("ignore")
#显示所有列
pd.set_option('display.max_columns', None)
def set_zero(var_name,scope=fluid.global_scope(),place=fluid.CPUPlace(),param_type="int64"):
"""
......@@ -118,9 +114,9 @@ loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(feed_list=data_list, capacity=batch_size, iterable=True)
test_loader.set_sample_list_generator(test_reader, places=place)
mean_auc_income = []
mean_auc_marital = []
auc_income_list = []
auc_marital_list = []
mmoe_res_file = open('mmoe_res.txt', 'w',encoding='utf-8')
for epoch in range(epochs):
for var in auc_states_1: # reset auc states
set_zero(var.name,place=place)
......@@ -150,12 +146,21 @@ for epoch in range(epochs):
feed=test_data,
fetch_list=[out_1,out_2,label_1,label_2,auc_income,auc_marital],
return_numpy=True)
mean_auc_income.append(test_auc_1_p)
mean_auc_marital.append(test_auc_2_p)
model_dir = os.path.join(args.model_dir,'epoch_' + str(epoch + 1), "checkpoint")
main_program = fluid.default_main_program()
fluid.io.save(main_program,model_dir)
auc_income_list.append(test_auc_1_p)
auc_marital_list.append(test_auc_2_p)
end = time.time()
print("epoch_id:[%d],epoch_time:[%.5f s],loss:[%.5f],train_auc_income:[%.5f],train_auc_marital:[%.5f],test_auc_income:[%.5f],test_auc_marital:[%.5f]"%
(epoch,end - begin,loss_data,auc_1_p,auc_2_p,test_auc_1_p,test_auc_2_p))
print("mean_auc_income:[%.5f],mean_auc_marital[%.5f]"%(np.mean(mean_auc_income),np.mean(mean_auc_marital)))
time_stamp = datetime.datetime.now()
print("%s,- INFO - epoch_id: %d,epoch_time: %.5f s,loss: %.5f,train_auc_income: %.5f,train_auc_marital: %.5f,test_auc_income: %.5f,test_auc_marital: %.5f"%
(time_stamp.strftime('%Y-%m-%d %H:%M:%S'),epoch,end - begin,loss_data,auc_1_p,auc_2_p,test_auc_1_p,test_auc_2_p))
time_stamp = datetime.datetime.now()
print("%s,- INFO - mean_mmoe_test_auc_income: %.5f,mean_mmoe_test_auc_marital %.5f,max_mmoe_test_auc_income: %.5f,max_mmoe_test_auc_marital %.5f"%(
time_stamp.strftime('%Y-%m-%d %H:%M:%S'),np.mean(auc_income_list),np.mean(auc_marital_list),np.max(auc_income_list),np.max(auc_marital_list)))
......
pandas==0.23.4
\ No newline at end of file
python mmoe_train.py --use_gpu 0\
--train_data_path 'train_data'\
--test_data_path 'test_data'\
--feature_size 499\
--batch_size 32\
--expert_num 8\
--gate_num 2\
--expert_size 16\
--tower_size 8\
--epochs 100
--train_data_path 'train_data'\
--test_data_path 'test_data'\
--model_dir 'model_dir'\
--feature_size 499\
--batch_size 32\
--expert_num 8\
--gate_num 2\
--expert_size 16\
--tower_size 8\
--epochs 100
CUDA_VISIBLE_DEVICES=0 python mmoe_train.py --use_gpu 1\
--train_data_path 'train_data'\
--test_data_path 'test_data'\
--model_dir 'model_dir'\
--feature_size 499\
--batch_size 32\
--expert_num 8\
......
......@@ -5,18 +5,14 @@ import os
import paddle.fluid as fluid
import io
from itertools import islice
from sklearn.preprocessing import MinMaxScaler
import warnings
##按行读取文件
def reader_creator(file_dir):
def reader():
files = os.listdir(file_dir)
for fi in files:
with io.open(
os.path.join(file_dir, fi), "r", encoding='utf-8') as f:
for l in islice(f, 1, None): ##忽略第一行
for l in islice(f, 1, None):
l = l.strip().split(',')
l = list(map(float, l))
label_income = []
......@@ -36,8 +32,6 @@ def reader_creator(file_dir):
return reader
##读取一个batch
def batch_reader(reader, batch_size):
def batch_reader():
r = reader()
......@@ -47,14 +41,8 @@ def batch_reader(reader, batch_size):
if (len(b) == batch_size):
yield b
b = []
#if len(b) != 0:
# yield b
#
return batch_reader
##准备数据
def prepare_reader(data_path, batch_size):
data_set = reader_creator(data_path)
#random.shuffle(data_set)
return batch_reader(data_set, batch_size)
......@@ -4,6 +4,7 @@
```
├── README.md # 文档
├── requirements.txt # 需要的安装包
├── share_bottom.py # mmoe模型脚本
├── utils # 通用函数
├── args # 参数脚本
......@@ -21,15 +22,15 @@ share_bottom是多任务学习的基本框架,其特点是对于不同的任
1.income
>best:0.94899
>max_sb_test_auc_income:0.94993
>
>mean:0.94402
>mean_sb_test_auc_income: 0.93120
2.marital
> best:0.99394
> max_sb_test_auc_marital:0.99384
>
> mean:0.99311
> mean_sb_test_auc_marital:0.99256
本项目支持GPU和CPU两种单机训练环境。
......@@ -74,8 +75,9 @@ GPU环境
```sh
python share_bottom.py --use_gpu 1\ #使用gpu训练
--train_path data/data24913/train_data/\ #训练数据路径
--test_path data/data24913/test_data/\ #测试数据路径
--train_path 'train_data'\ #训练数据路径
--test_path 'test_data'\ #测试数据路径
--model_dir 'model_dir'\ #模型保存地址
--batch_size 32\ #设置batch_size大小
--feature_size 499\ #设置特征维度
--bottom_size 117\ #设置bottom网络大小
......@@ -96,8 +98,9 @@ CPU环境
```sh
python share_bottom.py --use_gpu 0\ #使用cpu训练
--train_path data/data24913/train_data/\ #训练数据路径
--test_path data/data24913/test_data/\ #测试数据路径
--train_path 'train_data'\ #训练数据路径
--test_path 'test_data'\ #测试数据路径
--model_dir 'model_dir'\ #模型保存地址
--batch_size 32\ #设置batch_size大小
--feature_size 499\ #设置特征维度
--bottom_size 117\ #设置bottom网络大小
......@@ -122,6 +125,14 @@ python share_bottom.py --use_gpu 0\ #使用cpu训练
epoch设置为100的训练和测试效果如下:
![](./image/share_bottom.png)
```text
batch_size:[32],epochs:[100],feature_size:[499],bottom_size:[117],tower_nums:[2],tower_size:[8]
2020-04-16 16:01:04,- INFO - epoch_id: 0,epoch_time: 77.17624 s,loss: 0.62643,train_auc_income: 0.49442,train_auc_marital: 0.93509,test_auc_income: 0.50000,test_auc_marital: 0.93920
2020-04-16 16:02:23,- INFO - epoch_id: 1,epoch_time: 78.84795 s,loss: 0.47955,train_auc_income: 0.49721,train_auc_marital: 0.98118,test_auc_income: 0.50000,test_auc_marital: 0.98804
2020-04-16 16:03:43,- INFO - epoch_id: 2,epoch_time: 79.67485 s,loss:
......
2020-04-16 18:22:36,- INFO - epoch_id: 98,epoch_time: 85.56907 s,loss: 0.30696,train_auc_income: 0.94701,train_auc_marital: 0.99425,test_auc_income: 0.94919,test_auc_marital: 0.99376
2020-04-16 18:24:02,- INFO - epoch_id: 99,epoch_time: 86.08858 s,loss: 0.29395,train_auc_income: 0.94736,train_auc_marital: 0.99422,test_auc_income: 0.94908,test_auc_marital: 0.99383
2020-04-16 18:24:02,- INFO - mean_sb_test_auc_income: 0.93120,mean_sb_test_auc_marital 0.99256,max_sb_test_auc_income: 0.94993,max_sb_test_auc_marital 0.99384
```
![](./image/share_bottom2.png)
\ No newline at end of file
......@@ -29,16 +29,9 @@ def parse_args():
parser.add_argument("--epochs", type=int, default=400, help="epochs")
parser.add_argument("--batch_size", type=int, default=32, help="batch_size")
parser.add_argument('--use_gpu', type=int, default=0, help='whether using gpu')
parser.add_argument(
'--train_data_path',
type=str,
default=' ',
help="train_data_path")
parser.add_argument(
'--test_data_path',
type=str,
default=' ',
help="test_data_path")
parser.add_argument('--train_data_path',type=str,default='train_data',help="train_data_path")
parser.add_argument('--test_data_path',type=str,default='test_data',help="test_data_path")
parser.add_argument('--model_dir',type=str, default='model_dir', help="model_dir")
args = parser.parse_args()
return args
......@@ -49,9 +42,9 @@ def data_preparation_args():
parser.add_argument("--test_path", type=str, default='', help="test_path")
parser.add_argument(
'--train_data_path', type=str, default='', help="train_data_path")
'--train_data_path', type=str, default='train_data', help="train_data_path")
parser.add_argument(
'--test_data_path', type=str, default='', help="test_data_path")
'--test_data_path', type=str, default='test_data', help="test_data_path")
parser.add_argument(
'--validation_data_path',
type=str,
......
......@@ -5,6 +5,7 @@ train_path="data/census-income.data"
test_path="data/census-income.test"
train_data_path="train_data/"
test_data_path="test_data/"
pip install -r requirements.txt
wget -P data/ https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census.tar.gz
tar -zxvf data/census.tar.gz -C data/
......
import pandas as pd
import numpy as np
import paddle.fluid as fluid
from sklearn.preprocessing import MinMaxScaler
from args import *
......
pandas==0.23.4
\ No newline at end of file
import paddle.fluid as fluid
import pandas as pd
import numpy as np
import paddle
import os
import time
import datetime
import utils
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from args import *
import warnings
warnings.filterwarnings("ignore")
#显示所有列
pd.set_option('display.max_columns', None)
def set_zero(var_name,scope=fluid.global_scope(),place=fluid.CPUPlace(),param_type="int64"):
"""
......@@ -106,8 +99,8 @@ loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(feed_list=data_list, capacity=batch_size, iterable=True)
test_loader.set_sample_list_generator(test_reader, places=place)
mean_auc_income = []
mean_auc_marital = []
auc_income_list = []
auc_marital_list = []
for epoch in range(epochs):
begin = time.time()
......@@ -139,13 +132,20 @@ for epoch in range(epochs):
feed=test_data,
fetch_list=[out_1,out_2,label_1,label_2,auc_income,auc_marital],
return_numpy=True)
mean_auc_income.append(test_auc_1_p)
mean_auc_marital.append(test_auc_2_p)
model_dir = os.path.join(args.model_dir,'epoch_' + str(epoch + 1), "checkpoint")
main_program = fluid.default_main_program()
fluid.io.save(main_program,model_dir)
auc_income_list.append(test_auc_1_p)
auc_marital_list.append(test_auc_2_p)
end = time.time()
print("epoch_id:[%d],epoch_time:[%.5f s],loss:[%.5f],train_auc_income:[%.5f],train_auc_marital:[%.5f],test_auc_income:[%.5f],test_auc_marital:[%.5f]"%
(epoch,end - begin,loss_data,auc_1_p,auc_2_p,test_auc_1_p,test_auc_2_p))
print("mean_auc_income:[%.5f],mean_auc_marital[%.5f]"%(np.mean(mean_auc_income),np.mean(mean_auc_marital)))
time_stamp = datetime.datetime.now()
print("%s,- INFO - epoch_id: %d,epoch_time: %.5f s,loss: %.5f,train_auc_income: %.5f,train_auc_marital: %.5f,test_auc_income: %.5f,test_auc_marital: %.5f"%
(time_stamp.strftime('%Y-%m-%d %H:%M:%S'),epoch,end - begin,loss_data,auc_1_p,auc_2_p,test_auc_1_p,test_auc_2_p))
time_stamp = datetime.datetime.now()
print("%s,- INFO - mean_sb_test_auc_income: %.5f,mean_sb_test_auc_marital %.5f,max_sb_test_auc_income: %.5f,max_sb_test_auc_marital %.5f"%(
time_stamp.strftime('%Y-%m-%d %H:%M:%S'),np.mean(auc_income_list),np.mean(auc_marital_list),np.max(auc_income_list),np.max(auc_marital_list)))
......
python share_bottom.py --use_gpu 0 \
--epochs 100 \
--train_data_path '../train_data' \
--test_data_path '../test_data' \
--batch_size 16 \
--train_data_path 'train_data' \
--test_data_path 'test_data' \
--model_dir 'model_dir' \
--batch_size 32 \
--feature_size 499 \
--bottom_size 117 \
--tower_nums 2 \
......
python share_bottom.py --use_gpu 1 \
--epochs 100 \
--train_data_path '../train_data' \
--test_data_path '../test_data' \
--batch_size 16 \
--train_data_path '.train_data' \
--test_data_path '.test_data' \
--model_dir 'model_dir' \
--batch_size 32 \
--feature_size 499 \
--bottom_size 117 \
--tower_nums 2 \
......
......@@ -5,18 +5,14 @@ import os
import paddle.fluid as fluid
import io
from itertools import islice
from sklearn.preprocessing import MinMaxScaler
import warnings
##按行读取文件
def reader_creator(file_dir):
def reader():
files = os.listdir(file_dir)
for fi in files:
with io.open(
os.path.join(file_dir, fi), "r", encoding='utf-8') as f:
for l in islice(f, 1, None): ##忽略第一行
for l in islice(f, 1, None):
l = l.strip().split(',')
l = list(map(float, l))
label_income = []
......@@ -36,8 +32,6 @@ def reader_creator(file_dir):
return reader
##读取一个batch
def batch_reader(reader, batch_size):
def batch_reader():
r = reader()
......@@ -49,9 +43,6 @@ def batch_reader(reader, batch_size):
b = []
return batch_reader
##准备数据
def prepare_reader(data_path, batch_size):
data_set = reader_creator(data_path)
#random.shuffle(data_set)
return batch_reader(data_set, batch_size)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册