未验证 提交 6732a284 编写于 作者: Z zhang wenhui 提交者: GitHub

fix gru_dygraph (#4248)

* fix gru_dygraph

* fix mmoe doc
上级 f9853b7f
# gru4rec 动态图实现
# 环境配置
paddle 1.7
# 下载数据
```
wget https://paddlerec.bj.bcebos.com/gru4rec/dy_graph/data_rsc15.tar
tar xvf data_rsc15.tar
```
# 数据格式
数据格式及预处理处理同静态图相同。
# 训练及预测
```
......
......@@ -21,6 +21,7 @@ import paddle.fluid.core as core
from paddle.fluid.dygraph.nn import Embedding
import paddle.fluid.framework as framework
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.optimizer import AdagradOptimizer
from paddle.fluid.dygraph.base import to_variable
import numpy as np
import six
......@@ -67,7 +68,7 @@ class SimpleGRURNN(fluid.Layer):
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale))
self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1))
self.weight_1_arr.append(self.add_parameter('w1_%d' % i, weight_1))
weight_2 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
......@@ -76,7 +77,7 @@ class SimpleGRURNN(fluid.Layer):
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale))
self.weight_2_arr.append(self.add_parameter('w_%d' % i, weight_2))
self.weight_2_arr.append(self.add_parameter('w2_%d' % i, weight_2))
weight_3 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
......@@ -85,7 +86,7 @@ class SimpleGRURNN(fluid.Layer):
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale))
self.weight_3_arr.append(self.add_parameter('w_%d' % i, weight_3))
self.weight_3_arr.append(self.add_parameter('w3_%d' % i, weight_3))
bias_1 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
......@@ -93,7 +94,7 @@ class SimpleGRURNN(fluid.Layer):
shape=[self._hidden_size * 2],
dtype="float32",
default_initializer=fluid.initializer.Constant(0.0))
self.bias_1_arr.append(self.add_parameter('b_%d' % i, bias_1))
self.bias_1_arr.append(self.add_parameter('b1_%d' % i, bias_1))
bias_2 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
......@@ -101,7 +102,7 @@ class SimpleGRURNN(fluid.Layer):
shape=[self._hidden_size * 1],
dtype="float32",
default_initializer=fluid.initializer.Constant(0.0))
self.bias_2_arr.append(self.add_parameter('b_%d' % i, bias_2))
self.bias_2_arr.append(self.add_parameter('b2_%d' % i, bias_2))
def forward(self, input_embedding, init_hidden=None):
hidden_array = []
......@@ -278,10 +279,10 @@ def train_ptb_lm():
init_scale = 0.1
max_grad_norm = 5.0
epoch_start_decay = 10
max_epoch = 3
max_epoch = 5
dropout = 0.0
lr_decay = 0.5
base_learning_rate = 1.0
base_learning_rate = 0.05
elif model_type == "medium":
num_layers = 2
batch_size = 20
......@@ -353,15 +354,22 @@ def train_ptb_lm():
log_interval = total_batch_size // 20
bd = []
lr_arr = [1.0]
lr_arr = [base_learning_rate]
for i in range(1, max_epoch):
bd.append(total_batch_size * i)
new_lr = base_learning_rate * (lr_decay**
max(i + 1 - epoch_start_decay, 0.0))
lr_arr.append(new_lr)
sgd = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr))
sgd = AdagradOptimizer(
parameter_list=ptb_model.parameters(),
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr))
print("parameters:--------------------------------")
for para in ptb_model.parameters():
print(para.name)
print("parameters:--------------------------------")
def eval(model, data):
print("begion to eval")
......@@ -450,7 +458,7 @@ def train_ptb_lm():
print("Saved model to: %s.\n" % save_model_dir)
eval(ptb_model, test_data)
eval(ptb_model, test_data)
#eval(ptb_model, test_data)
train_ptb_lm()
......@@ -8,6 +8,9 @@ MMoE是经典的多任务(multi-task)模型,原论文[Modeling Task Relati
我们基于实际工业界场景实现了MMoE的核心思想。
## 配置
1.6 及以上
## 数据
我们采用了随机数据作为训练数据,可以根据自己的数据调整data部分。
......
import paddle.fluid as fluid
import numpy as np
dict_dim = 1000
emb_dim = 64
import time
from args import *
def fc_layers(input, layers, acts, prefix):
......@@ -59,7 +58,7 @@ def mmoe_layer(inputs, expert_num=8, gate_num=3):
return outs
def model():
def model(dict_dim, emb_dim):
label_like = fluid.layers.data(
name="label_like",
shape=[-1, 1],
......@@ -116,13 +115,18 @@ def model():
return cost, [a_data, label_like, label_comment, label_share]
batch_size = 5
args = parse_args()
batch_size = args.batch_size
dict_dim = args.dict_dim
emb_dim = args.emb_dim
print("batch_size:[%d], dict_dim:[%d], emb_dim:[%d], learning_rate:[%.4f]" %
(batch_size, dict_dim, emb_dim, args.base_lr))
loss, data_list = model()
sgd = fluid.optimizer.SGD(learning_rate=0.001)
loss, data_list = model(dict_dim, emb_dim)
sgd = fluid.optimizer.SGD(learning_rate=args.base_lr)
sgd.minimize(loss)
use_cuda = True
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=data_list, place=place)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
......@@ -131,6 +135,7 @@ for batch_id in range(100):
np.random.randint(
2, size=(batch_size, 1)).astype('int64') for i in range(4)
]
begin = time.time()
loss_data, = exe.run(fluid.default_main_program(),
feed={
"a": data[0],
......@@ -139,4 +144,6 @@ for batch_id in range(100):
"label_share": data[3]
},
fetch_list=[loss.name])
print(batch_id, " loss:", float(np.array(loss_data)))
end = time.time()
print("batch_id:[%d], loss:[%.5f], batch_time:[%.5f s]" %
(batch_id, float(np.array(loss_data)), end - begin))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册