未验证 提交 6732a284 编写于 作者: Z zhang wenhui 提交者: GitHub

fix gru_dygraph (#4248)

* fix gru_dygraph

* fix mmoe doc
上级 f9853b7f
# gru4rec 动态图实现 # gru4rec 动态图实现
# 环境配置
paddle 1.7
# 下载数据 # 下载数据
``` ```
wget https://paddlerec.bj.bcebos.com/gru4rec/dy_graph/data_rsc15.tar wget https://paddlerec.bj.bcebos.com/gru4rec/dy_graph/data_rsc15.tar
tar xvf data_rsc15.tar tar xvf data_rsc15.tar
``` ```
# 数据格式
数据格式及预处理处理同静态图相同。
# 训练及预测 # 训练及预测
``` ```
......
...@@ -21,6 +21,7 @@ import paddle.fluid.core as core ...@@ -21,6 +21,7 @@ import paddle.fluid.core as core
from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.dygraph.nn import Embedding
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.optimizer import AdagradOptimizer
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
import numpy as np import numpy as np
import six import six
...@@ -67,7 +68,7 @@ class SimpleGRURNN(fluid.Layer): ...@@ -67,7 +68,7 @@ class SimpleGRURNN(fluid.Layer):
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)) low=-self._init_scale, high=self._init_scale))
self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) self.weight_1_arr.append(self.add_parameter('w1_%d' % i, weight_1))
weight_2 = self.create_parameter( weight_2 = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
...@@ -76,7 +77,7 @@ class SimpleGRURNN(fluid.Layer): ...@@ -76,7 +77,7 @@ class SimpleGRURNN(fluid.Layer):
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)) low=-self._init_scale, high=self._init_scale))
self.weight_2_arr.append(self.add_parameter('w_%d' % i, weight_2)) self.weight_2_arr.append(self.add_parameter('w2_%d' % i, weight_2))
weight_3 = self.create_parameter( weight_3 = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
...@@ -85,7 +86,7 @@ class SimpleGRURNN(fluid.Layer): ...@@ -85,7 +86,7 @@ class SimpleGRURNN(fluid.Layer):
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)) low=-self._init_scale, high=self._init_scale))
self.weight_3_arr.append(self.add_parameter('w_%d' % i, weight_3)) self.weight_3_arr.append(self.add_parameter('w3_%d' % i, weight_3))
bias_1 = self.create_parameter( bias_1 = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
...@@ -93,7 +94,7 @@ class SimpleGRURNN(fluid.Layer): ...@@ -93,7 +94,7 @@ class SimpleGRURNN(fluid.Layer):
shape=[self._hidden_size * 2], shape=[self._hidden_size * 2],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.Constant(0.0)) default_initializer=fluid.initializer.Constant(0.0))
self.bias_1_arr.append(self.add_parameter('b_%d' % i, bias_1)) self.bias_1_arr.append(self.add_parameter('b1_%d' % i, bias_1))
bias_2 = self.create_parameter( bias_2 = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
...@@ -101,7 +102,7 @@ class SimpleGRURNN(fluid.Layer): ...@@ -101,7 +102,7 @@ class SimpleGRURNN(fluid.Layer):
shape=[self._hidden_size * 1], shape=[self._hidden_size * 1],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.Constant(0.0)) default_initializer=fluid.initializer.Constant(0.0))
self.bias_2_arr.append(self.add_parameter('b_%d' % i, bias_2)) self.bias_2_arr.append(self.add_parameter('b2_%d' % i, bias_2))
def forward(self, input_embedding, init_hidden=None): def forward(self, input_embedding, init_hidden=None):
hidden_array = [] hidden_array = []
...@@ -278,10 +279,10 @@ def train_ptb_lm(): ...@@ -278,10 +279,10 @@ def train_ptb_lm():
init_scale = 0.1 init_scale = 0.1
max_grad_norm = 5.0 max_grad_norm = 5.0
epoch_start_decay = 10 epoch_start_decay = 10
max_epoch = 3 max_epoch = 5
dropout = 0.0 dropout = 0.0
lr_decay = 0.5 lr_decay = 0.5
base_learning_rate = 1.0 base_learning_rate = 0.05
elif model_type == "medium": elif model_type == "medium":
num_layers = 2 num_layers = 2
batch_size = 20 batch_size = 20
...@@ -353,15 +354,22 @@ def train_ptb_lm(): ...@@ -353,15 +354,22 @@ def train_ptb_lm():
log_interval = total_batch_size // 20 log_interval = total_batch_size // 20
bd = [] bd = []
lr_arr = [1.0] lr_arr = [base_learning_rate]
for i in range(1, max_epoch): for i in range(1, max_epoch):
bd.append(total_batch_size * i) bd.append(total_batch_size * i)
new_lr = base_learning_rate * (lr_decay** new_lr = base_learning_rate * (lr_decay**
max(i + 1 - epoch_start_decay, 0.0)) max(i + 1 - epoch_start_decay, 0.0))
lr_arr.append(new_lr) lr_arr.append(new_lr)
sgd = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay( sgd = AdagradOptimizer(
boundaries=bd, values=lr_arr)) parameter_list=ptb_model.parameters(),
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr))
print("parameters:--------------------------------")
for para in ptb_model.parameters():
print(para.name)
print("parameters:--------------------------------")
def eval(model, data): def eval(model, data):
print("begion to eval") print("begion to eval")
...@@ -450,7 +458,7 @@ def train_ptb_lm(): ...@@ -450,7 +458,7 @@ def train_ptb_lm():
print("Saved model to: %s.\n" % save_model_dir) print("Saved model to: %s.\n" % save_model_dir)
eval(ptb_model, test_data) eval(ptb_model, test_data)
eval(ptb_model, test_data) #eval(ptb_model, test_data)
train_ptb_lm() train_ptb_lm()
...@@ -8,6 +8,9 @@ MMoE是经典的多任务(multi-task)模型,原论文[Modeling Task Relati ...@@ -8,6 +8,9 @@ MMoE是经典的多任务(multi-task)模型,原论文[Modeling Task Relati
我们基于实际工业界场景实现了MMoE的核心思想。 我们基于实际工业界场景实现了MMoE的核心思想。
## 配置
1.6 及以上
## 数据 ## 数据
我们采用了随机数据作为训练数据,可以根据自己的数据调整data部分。 我们采用了随机数据作为训练数据,可以根据自己的数据调整data部分。
......
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import time
dict_dim = 1000 from args import *
emb_dim = 64
def fc_layers(input, layers, acts, prefix): def fc_layers(input, layers, acts, prefix):
...@@ -59,7 +58,7 @@ def mmoe_layer(inputs, expert_num=8, gate_num=3): ...@@ -59,7 +58,7 @@ def mmoe_layer(inputs, expert_num=8, gate_num=3):
return outs return outs
def model(): def model(dict_dim, emb_dim):
label_like = fluid.layers.data( label_like = fluid.layers.data(
name="label_like", name="label_like",
shape=[-1, 1], shape=[-1, 1],
...@@ -116,13 +115,18 @@ def model(): ...@@ -116,13 +115,18 @@ def model():
return cost, [a_data, label_like, label_comment, label_share] return cost, [a_data, label_like, label_comment, label_share]
batch_size = 5 args = parse_args()
batch_size = args.batch_size
dict_dim = args.dict_dim
emb_dim = args.emb_dim
print("batch_size:[%d], dict_dim:[%d], emb_dim:[%d], learning_rate:[%.4f]" %
(batch_size, dict_dim, emb_dim, args.base_lr))
loss, data_list = model() loss, data_list = model(dict_dim, emb_dim)
sgd = fluid.optimizer.SGD(learning_rate=0.001) sgd = fluid.optimizer.SGD(learning_rate=args.base_lr)
sgd.minimize(loss) sgd.minimize(loss)
use_cuda = True place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=data_list, place=place) feeder = fluid.DataFeeder(feed_list=data_list, place=place)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
...@@ -131,6 +135,7 @@ for batch_id in range(100): ...@@ -131,6 +135,7 @@ for batch_id in range(100):
np.random.randint( np.random.randint(
2, size=(batch_size, 1)).astype('int64') for i in range(4) 2, size=(batch_size, 1)).astype('int64') for i in range(4)
] ]
begin = time.time()
loss_data, = exe.run(fluid.default_main_program(), loss_data, = exe.run(fluid.default_main_program(),
feed={ feed={
"a": data[0], "a": data[0],
...@@ -139,4 +144,6 @@ for batch_id in range(100): ...@@ -139,4 +144,6 @@ for batch_id in range(100):
"label_share": data[3] "label_share": data[3]
}, },
fetch_list=[loss.name]) fetch_list=[loss.name])
print(batch_id, " loss:", float(np.array(loss_data))) end = time.time()
print("batch_id:[%d], loss:[%.5f], batch_time:[%.5f s]" %
(batch_id, float(np.array(loss_data)), end - begin))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册