未验证 提交 5f187850 编写于 作者: Z zhang wenhui 提交者: GitHub

Update2.0 model (#4905)

* update api 1.8

* fix paddlerec readme

* update 20 , test=develop
上级 3fad507e
[156, 51, 24, 103, 195, 35, 188, 16, 224, 173, 116, 3, 226, 11, 64, 94, 6, 70, 197, 164, 220, 77, 172, 194, 227, 12, 65, 129, 39, 38, 75, 210, 215, 36, 46, 185, 76, 222, 108, 78, 120, 71, 33, 189, 135, 97, 90, 219, 105, 205, 136, 167, 106, 29, 157, 125, 217, 121, 175, 143, 200, 45, 179, 37, 86, 140, 225, 47, 20, 228, 4, 209, 177, 178, 171, 58, 48, 118, 9, 149, 55, 192, 82, 17, 43, 54, 93, 96, 159, 216, 18, 206, 223, 104, 132, 182, 60, 109, 28, 180, 44, 166, 128, 27, 163, 141, 229, 102, 150, 7, 83, 198, 41, 191, 114, 117, 122, 161, 130, 174, 176, 160, 201, 49, 112, 69, 165, 95, 133, 92, 59, 110, 151, 203, 67, 169, 21, 66, 80, 22, 23, 152, 40, 127, 111, 186, 72, 26, 190, 42, 0, 63, 53, 124, 137, 85, 126, 196, 187, 208, 98, 25, 15, 170, 193, 168, 202, 31, 146, 147, 113, 32, 204, 131, 68, 84, 213, 19, 81, 79, 162, 199, 107, 50, 2, 207, 10, 181, 144, 139, 134, 62, 155, 142, 214, 212, 61, 52, 101, 99, 158, 145, 13, 153, 56, 184, 221]
\ No newline at end of file
import os
import shutil
import sys
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
TOOLS_PATH = os.path.join(LOCAL_PATH, "..", "..", "tools")
sys.path.append(TOOLS_PATH)
from tools import download_file_and_uncompress, download_file
if __name__ == '__main__':
url = "https://s3-eu-west-1.amazonaws.com/kaggle-display-advertising-challenge-dataset/dac.tar.gz"
url2 = "https://paddlerec.bj.bcebos.com/deepfm%2Ffeat_dict_10.pkl2"
print("download and extract starting...")
download_file_and_uncompress(url)
if not os.path.exists("aid_data"):
os.makedirs("aid_data")
download_file(url2, "./aid_data/feat_dict_10.pkl2", True)
print("download and extract finished")
print("preprocessing...")
os.system("python preprocess.py")
print("preprocess done")
shutil.rmtree("raw_data")
print("done")
from __future__ import division
import os
import numpy
from collections import Counter
import shutil
import pickle
def get_raw_data(intput_file, raw_data, ins_per_file):
if not os.path.isdir(raw_data):
os.mkdir(raw_data)
fin = open(intput_file, 'r')
fout = open(os.path.join(raw_data, 'part-0'), 'w')
for line_idx, line in enumerate(fin):
if line_idx % ins_per_file == 0 and line_idx != 0:
fout.close()
cur_part_idx = int(line_idx / ins_per_file)
fout = open(
os.path.join(raw_data, 'part-' + str(cur_part_idx)), 'w')
fout.write(line)
fout.close()
fin.close()
def split_data(raw_data, aid_data, train_data, test_data):
split_rate_ = 0.9
dir_train_file_idx_ = os.path.join(aid_data, 'train_file_idx.txt')
filelist_ = [
os.path.join(raw_data, 'part-%d' % x)
for x in range(len(os.listdir(raw_data)))
]
if not os.path.exists(dir_train_file_idx_):
train_file_idx = list(
numpy.random.choice(
len(filelist_), int(len(filelist_) * split_rate_), False))
with open(dir_train_file_idx_, 'w') as fout:
fout.write(str(train_file_idx))
else:
with open(dir_train_file_idx_, 'r') as fin:
train_file_idx = eval(fin.read())
for idx in range(len(filelist_)):
if idx in train_file_idx:
shutil.move(filelist_[idx], train_data)
else:
shutil.move(filelist_[idx], test_data)
def get_feat_dict(intput_file, aid_data, print_freq=100000, total_ins=45000000):
freq_ = 10
dir_feat_dict_ = os.path.join(aid_data, 'feat_dict_' + str(freq_) + '.pkl2')
continuous_range_ = range(1, 14)
categorical_range_ = range(14, 40)
if not os.path.exists(dir_feat_dict_):
# print('generate a feature dict')
# Count the number of occurrences of discrete features
feat_cnt = Counter()
with open(intput_file, 'r') as fin:
for line_idx, line in enumerate(fin):
if line_idx % print_freq == 0:
print(r'generating feature dict {:.2f} %'.format((
line_idx / total_ins) * 100))
features = line.rstrip('\n').split('\t')
for idx in categorical_range_:
if features[idx] == '': continue
feat_cnt.update([features[idx]])
# Only retain discrete features with high frequency
dis_feat_set = set()
for feat, ot in feat_cnt.items():
if ot >= freq_:
dis_feat_set.add(feat)
# Create a dictionary for continuous and discrete features
feat_dict = {}
tc = 1
# Continuous features
for idx in continuous_range_:
feat_dict[idx] = tc
tc += 1
for feat in dis_feat_set:
feat_dict[feat] = tc
tc += 1
# Save dictionary
with open(dir_feat_dict_, 'wb') as fout:
pickle.dump(feat_dict, fout, protocol=2)
print('args.num_feat ', len(feat_dict) + 1)
def preprocess(input_file,
outdir,
ins_per_file,
total_ins=None,
print_freq=None):
train_data = os.path.join(outdir, "train_data")
test_data = os.path.join(outdir, "test_data")
aid_data = os.path.join(outdir, "aid_data")
raw_data = os.path.join(outdir, "raw_data")
if not os.path.isdir(train_data):
os.mkdir(train_data)
if not os.path.isdir(test_data):
os.mkdir(test_data)
if not os.path.isdir(aid_data):
os.mkdir(aid_data)
if print_freq is None:
print_freq = 10 * ins_per_file
get_raw_data(input_file, raw_data, ins_per_file)
split_data(raw_data, aid_data, train_data, test_data)
get_feat_dict(input_file, aid_data, print_freq, total_ins)
print('Done!')
if __name__ == '__main__':
preprocess('train.txt', './', 200000, 45000000)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import unittest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph.nn import Embedding
import paddle.fluid.framework as framework
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.optimizer import AdagradOptimizer
from paddle.fluid.dygraph.base import to_variable
import numpy as np
import six
import reader
import model_check
import time
from args import *
import sys
if sys.version[0] == '2':
reload(sys)
sys.setdefaultencoding("utf-8")
class SimpleGRURNN(fluid.Layer):
def __init__(self,
hidden_size,
num_steps,
num_layers=2,
init_scale=0.1,
dropout=None):
super(SimpleGRURNN, self).__init__()
self._hidden_size = hidden_size
self._num_layers = num_layers
self._init_scale = init_scale
self._dropout = dropout
self._num_steps = num_steps
self.weight_1_arr = []
self.weight_2_arr = []
self.weight_3_arr = []
self.bias_1_arr = []
self.bias_2_arr = []
self.mask_array = []
for i in range(self._num_layers):
weight_1 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 2, self._hidden_size * 2],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale))
self.weight_1_arr.append(self.add_parameter('w1_%d' % i, weight_1))
weight_2 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size, self._hidden_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale))
self.weight_2_arr.append(self.add_parameter('w2_%d' % i, weight_2))
weight_3 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size, self._hidden_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale))
self.weight_3_arr.append(self.add_parameter('w3_%d' % i, weight_3))
bias_1 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 2],
dtype="float32",
default_initializer=fluid.initializer.Constant(0.0))
self.bias_1_arr.append(self.add_parameter('b1_%d' % i, bias_1))
bias_2 = self.create_parameter(
attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 1],
dtype="float32",
default_initializer=fluid.initializer.Constant(0.0))
self.bias_2_arr.append(self.add_parameter('b2_%d' % i, bias_2))
def forward(self, input_embedding, init_hidden=None):
hidden_array = []
for i in range(self._num_layers):
hidden_array.append(init_hidden[i])
res = []
for index in range(self._num_steps):
step_input = input_embedding[:, index, :]
for k in range(self._num_layers):
pre_hidden = hidden_array[k]
weight_1 = self.weight_1_arr[k]
weight_2 = self.weight_2_arr[k]
weight_3 = self.weight_3_arr[k]
bias_1 = self.bias_1_arr[k]
bias_2 = self.bias_2_arr[k]
nn = fluid.layers.concat([step_input, pre_hidden], 1)
gate_input = fluid.layers.matmul(x=nn, y=weight_1)
gate_input = fluid.layers.elementwise_add(gate_input, bias_1)
u, r = fluid.layers.split(gate_input, num_or_sections=2, dim=-1)
hidden_c = fluid.layers.tanh(
fluid.layers.elementwise_add(
fluid.layers.matmul(
x=step_input, y=weight_2) + fluid.layers.matmul(
x=(fluid.layers.sigmoid(r) * pre_hidden),
y=weight_3),
bias_2))
hidden_state = fluid.layers.sigmoid(u) * pre_hidden + (
1.0 - fluid.layers.sigmoid(u)) * hidden_c
hidden_array[k] = hidden_state
step_input = hidden_state
if self._dropout is not None and self._dropout > 0.0:
step_input = fluid.layers.dropout(
step_input,
dropout_prob=self._dropout,
dropout_implementation='upscale_in_train')
res.append(step_input)
real_res = fluid.layers.concat(res, 1)
real_res = fluid.layers.reshape(
real_res, [-1, self._num_steps, self._hidden_size])
last_hidden = fluid.layers.concat(hidden_array, 1)
last_hidden = fluid.layers.reshape(
last_hidden, shape=[-1, self._num_layers, self._hidden_size])
last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2])
return real_res, last_hidden
class PtbModel(fluid.Layer):
def __init__(self,
name_scope,
hidden_size,
vocab_size,
num_layers=2,
num_steps=20,
init_scale=0.1,
dropout=None):
#super(PtbModel, self).__init__(name_scope)
super(PtbModel, self).__init__()
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.init_scale = init_scale
self.num_layers = num_layers
self.num_steps = num_steps
self.dropout = dropout
self.simple_gru_rnn = SimpleGRURNN(
#self.full_name(),
hidden_size,
num_steps,
num_layers=num_layers,
init_scale=init_scale,
dropout=dropout)
self.embedding = Embedding(
#self.full_name(),
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=False,
param_attr=fluid.ParamAttr(
name='embedding_para',
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale)))
self.softmax_weight = self.create_parameter(
attr=fluid.ParamAttr(),
shape=[self.hidden_size, self.vocab_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale))
self.softmax_bias = self.create_parameter(
attr=fluid.ParamAttr(),
shape=[self.vocab_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale))
def build_once(self, input, label, init_hidden):
pass
def forward(self, input, label, init_hidden):
init_h = fluid.layers.reshape(
init_hidden, shape=[self.num_layers, -1, self.hidden_size])
x_emb = self.embedding(input)
x_emb = fluid.layers.reshape(
x_emb, shape=[-1, self.num_steps, self.hidden_size])
if self.dropout is not None and self.dropout > 0.0:
x_emb = fluid.layers.dropout(
x_emb,
dropout_prob=self.dropout,
dropout_implementation='upscale_in_train')
rnn_out, last_hidden = self.simple_gru_rnn(x_emb, init_h)
projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
loss = fluid.layers.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False)
pre_2d = fluid.layers.reshape(projection, shape=[-1, self.vocab_size])
label_2d = fluid.layers.reshape(label, shape=[-1, 1])
acc = fluid.layers.accuracy(input=pre_2d, label=label_2d, k=20)
loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
return loss, last_hidden, acc
def debug_emb(self):
np.save("emb_grad", self.x_emb.gradient())
def train_ptb_lm():
args = parse_args()
# check if set use_gpu=True in paddlepaddle cpu version
model_check.check_cuda(args.use_gpu)
# check if paddlepaddle version is satisfied
model_check.check_version()
model_type = args.model_type
vocab_size = 37484
if model_type == "gru4rec":
num_layers = 1
batch_size = 500
hidden_size = 100
num_steps = 10
init_scale = 0.1
max_grad_norm = 5.0
epoch_start_decay = 10
max_epoch = 5
dropout = 0.0
lr_decay = 0.5
base_learning_rate = 0.05
else:
print("model type not support")
return
with fluid.dygraph.guard(core.CUDAPlace(0)):
if args.ce:
print("ce mode")
seed = 33
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
max_epoch = 1
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
num_steps=num_steps,
init_scale=init_scale,
dropout=dropout)
if args.init_from_pretrain_model:
if not os.path.exists(args.init_from_pretrain_model + '.pdparams'):
print(args.init_from_pretrain_model)
raise Warning("The pretrained params do not exist.")
return
fluid.load_dygraph(args.init_from_pretrain_model)
print("finish initing model from pretrained params from %s" %
(args.init_from_pretrain_model))
dy_param_updated = dict()
dy_param_init = dict()
dy_loss = None
last_hidden = None
data_path = args.data_path
print("begin to load data")
ptb_data = reader.get_ptb_data(data_path)
print("finished load data")
train_data, valid_data, test_data = ptb_data
batch_len = len(train_data) // batch_size
total_batch_size = (batch_len - 1) // num_steps
print("total_batch_size:", total_batch_size)
log_interval = total_batch_size // 20
bd = []
lr_arr = [base_learning_rate]
for i in range(1, max_epoch):
bd.append(total_batch_size * i)
new_lr = base_learning_rate * (lr_decay**
max(i + 1 - epoch_start_decay, 0.0))
lr_arr.append(new_lr)
grad_clip = fluid.clip.GradientClipByGlobalNorm(max_grad_norm)
sgd = AdagradOptimizer(
parameter_list=ptb_model.parameters(),
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
grad_clip=grad_clip)
print("parameters:--------------------------------")
for para in ptb_model.parameters():
print(para.name)
print("parameters:--------------------------------")
def eval(model, data):
print("begion to eval")
total_loss = 0.0
iters = 0.0
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
model.eval()
train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
init_hidden = to_variable(init_hidden_data)
accum_num_recall = 0.0
for batch_id, batch in enumerate(train_data_iter):
x_data, y_data = batch
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, num_steps, 1))
x = to_variable(x_data)
y = to_variable(y_data)
dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden)
out_loss = dy_loss.numpy()
acc_ = acc.numpy()[0]
accum_num_recall += acc_
if batch_id % 1 == 0:
print("batch_id:%d recall@20:%.4f" %
(batch_id, accum_num_recall / (batch_id + 1)))
init_hidden = last_hidden
total_loss += out_loss
iters += num_steps
print("eval finished")
ppl = np.exp(total_loss / iters)
print("recall@20 ", accum_num_recall / (batch_id + 1))
if args.ce:
print("kpis\ttest_ppl\t%0.3f" % ppl[0])
for epoch_id in range(max_epoch):
ptb_model.train()
total_loss = 0.0
iters = 0.0
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
train_data_iter = reader.get_data_iter(train_data, batch_size,
num_steps)
init_hidden = to_variable(init_hidden_data)
start_time = time.time()
for batch_id, batch in enumerate(train_data_iter):
x_data, y_data = batch
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, num_steps, 1))
x = to_variable(x_data)
y = to_variable(y_data)
dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden)
out_loss = dy_loss.numpy()
acc_ = acc.numpy()[0]
init_hidden = last_hidden.detach()
dy_loss.backward()
sgd.minimize(dy_loss)
ptb_model.clear_gradients()
total_loss += out_loss
iters += num_steps
if batch_id > 0 and batch_id % 100 == 1:
ppl = np.exp(total_loss / iters)
print(
"-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, acc: %.5f, lr: %.5f"
% (epoch_id, batch_id, ppl[0], acc_,
sgd._global_learning_rate().numpy()))
print("one ecpoh finished", epoch_id)
print("time cost ", time.time() - start_time)
ppl = np.exp(total_loss / iters)
print("-- Epoch:[%d]; ppl: %.5f" % (epoch_id, ppl[0]))
if args.ce:
print("kpis\ttrain_ppl\t%0.3f" % ppl[0])
save_model_dir = os.path.join(args.save_model_dir,
str(epoch_id), 'params')
fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
print("Saved model to: %s.\n" % save_model_dir)
eval(ptb_model, test_data)
#eval(ptb_model, test_data)
train_ptb_lm()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import unittest
import paddle
import numpy as np
import six
import reader
import model_check
import time
from args import *
import sys
if sys.version[0] == '2':
reload(sys)
sys.setdefaultencoding("utf-8")
class SimpleGRURNN(paddle.fluid.Layer):
def __init__(self,
hidden_size,
num_steps,
num_layers=2,
init_scale=0.1,
dropout=None):
super(SimpleGRURNN, self).__init__()
self._hidden_size = hidden_size
self._num_layers = num_layers
self._init_scale = init_scale
self._dropout = dropout
self._num_steps = num_steps
self.weight_1_arr = []
self.weight_2_arr = []
self.weight_3_arr = []
self.bias_1_arr = []
self.bias_2_arr = []
self.mask_array = []
for i in range(self._num_layers):
weight_1 = self.create_parameter(
attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 2, self._hidden_size * 2],
dtype="float32",
default_initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale))
self.weight_1_arr.append(self.add_parameter('w1_%d' % i, weight_1))
weight_2 = self.create_parameter(
attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size, self._hidden_size],
dtype="float32",
default_initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale))
self.weight_2_arr.append(self.add_parameter('w2_%d' % i, weight_2))
weight_3 = self.create_parameter(
attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size, self._hidden_size],
dtype="float32",
default_initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale))
self.weight_3_arr.append(self.add_parameter('w3_%d' % i, weight_3))
bias_1 = self.create_parameter(
attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 2],
dtype="float32",
default_initializer=paddle.nn.initializer.Constant(0.0))
self.bias_1_arr.append(self.add_parameter('b1_%d' % i, bias_1))
bias_2 = self.create_parameter(
attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 1],
dtype="float32",
default_initializer=paddle.nn.initializer.Constant(0.0))
self.bias_2_arr.append(self.add_parameter('b2_%d' % i, bias_2))
def forward(self, input_embedding, init_hidden=None):
hidden_array = []
for i in range(self._num_layers):
hidden_array.append(init_hidden[i])
res = []
for index in range(self._num_steps):
step_input = input_embedding[:, index, :]
for k in range(self._num_layers):
pre_hidden = hidden_array[k]
weight_1 = self.weight_1_arr[k]
weight_2 = self.weight_2_arr[k]
weight_3 = self.weight_3_arr[k]
bias_1 = self.bias_1_arr[k]
bias_2 = self.bias_2_arr[k]
nn = paddle.concat(x=[step_input, pre_hidden], axis=1)
gate_input = paddle.matmul(x=nn, y=weight_1)
gate_input = paddle.add(x=gate_input, y=bias_1)
u, r = paddle.split(x=gate_input, num_or_sections=2, axis=-1)
hidden_c = paddle.tanh(
paddle.add(x=paddle.matmul(
x=step_input, y=weight_2) + paddle.matmul(
x=(paddle.nn.functional.sigmoid(r) * pre_hidden),
y=weight_3),
y=bias_2))
hidden_state = paddle.nn.functional.sigmoid(u) * pre_hidden + (
1.0 - paddle.nn.functional.sigmoid(u)) * hidden_c
hidden_array[k] = hidden_state
step_input = hidden_state
if self._dropout is not None and self._dropout > 0.0:
step_input = paddle.fluid.layers.dropout(
step_input,
dropout_prob=self._dropout,
dropout_implementation='upscale_in_train')
res.append(step_input)
real_res = paddle.concat(x=res, axis=1)
real_res = paddle.fluid.layers.reshape(
real_res, [-1, self._num_steps, self._hidden_size])
last_hidden = paddle.concat(x=hidden_array, axis=1)
last_hidden = paddle.fluid.layers.reshape(
last_hidden, shape=[-1, self._num_layers, self._hidden_size])
last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2])
return real_res, last_hidden
class PtbModel(paddle.fluid.Layer):
def __init__(self,
name_scope,
hidden_size,
vocab_size,
num_layers=2,
num_steps=20,
init_scale=0.1,
dropout=None):
#super(PtbModel, self).__init__(name_scope)
super(PtbModel, self).__init__()
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.init_scale = init_scale
self.num_layers = num_layers
self.num_steps = num_steps
self.dropout = dropout
self.simple_gru_rnn = SimpleGRURNN(
#self.full_name(),
hidden_size,
num_steps,
num_layers=num_layers,
init_scale=init_scale,
dropout=dropout)
self.embedding = paddle.fluid.dygraph.nn.Embedding(
#self.full_name(),
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=False,
param_attr=paddle.ParamAttr(
name='embedding_para',
initializer=paddle.nn.initializer.Uniform(
low=-init_scale, high=init_scale)))
self.softmax_weight = self.create_parameter(
attr=paddle.ParamAttr(),
shape=[self.hidden_size, self.vocab_size],
dtype="float32",
default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale))
self.softmax_bias = self.create_parameter(
attr=paddle.ParamAttr(),
shape=[self.vocab_size],
dtype="float32",
default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale))
def build_once(self, input, label, init_hidden):
pass
def forward(self, input, label, init_hidden):
init_h = paddle.fluid.layers.reshape(
init_hidden, shape=[self.num_layers, -1, self.hidden_size])
x_emb = self.embedding(input)
x_emb = paddle.fluid.layers.reshape(
x_emb, shape=[-1, self.num_steps, self.hidden_size])
if self.dropout is not None and self.dropout > 0.0:
x_emb = paddle.fluid.layers.dropout(
x_emb,
dropout_prob=self.dropout,
dropout_implementation='upscale_in_train')
rnn_out, last_hidden = self.simple_gru_rnn(x_emb, init_h)
projection = paddle.matmul(x=rnn_out, y=self.softmax_weight)
projection = paddle.add(x=projection, y=self.softmax_bias)
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False)
pre_2d = paddle.fluid.layers.reshape(
projection, shape=[-1, self.vocab_size])
label_2d = paddle.fluid.layers.reshape(label, shape=[-1, 1])
acc = paddle.metric.accuracy(input=pre_2d, label=label_2d, k=20)
loss = paddle.fluid.layers.reshape(loss, shape=[-1, self.num_steps])
loss = paddle.reduce_mean(loss, dim=[0])
loss = paddle.reduce_sum(loss)
return loss, last_hidden, acc
def debug_emb(self):
np.save("emb_grad", self.x_emb.gradient())
def train_ptb_lm():
args = parse_args()
# check if set use_gpu=True in paddlepaddle cpu version
model_check.check_cuda(args.use_gpu)
# check if paddlepaddle version is satisfied
model_check.check_version()
model_type = args.model_type
vocab_size = 37484
if model_type == "gru4rec":
num_layers = 1
batch_size = 500
hidden_size = 100
num_steps = 10
init_scale = 0.1
max_grad_norm = 5.0
epoch_start_decay = 10
max_epoch = 5
dropout = 0.0
lr_decay = 0.5
base_learning_rate = 0.05
else:
print("model type not support")
return
paddle.disable_static(paddle.fluid.core.CUDAPlace(0))
if args.ce:
print("ce mode")
seed = 33
np.random.seed(seed)
paddle.static.default_startup_program().random_seed = seed
paddle.static.default_main_program().random_seed = seed
max_epoch = 1
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
num_steps=num_steps,
init_scale=init_scale,
dropout=dropout)
if args.init_from_pretrain_model:
if not os.path.exists(args.init_from_pretrain_model + '.pdparams'):
print(args.init_from_pretrain_model)
raise Warning("The pretrained params do not exist.")
return
paddle.fluid.load_dygraph(args.init_from_pretrain_model)
print("finish initing model from pretrained params from %s" %
(args.init_from_pretrain_model))
dy_param_updated = dict()
dy_param_init = dict()
dy_loss = None
last_hidden = None
data_path = args.data_path
print("begin to load data")
ptb_data = reader.get_ptb_data(data_path)
print("finished load data")
train_data, valid_data, test_data = ptb_data
batch_len = len(train_data) // batch_size
total_batch_size = (batch_len - 1) // num_steps
print("total_batch_size:", total_batch_size)
log_interval = total_batch_size // 20
bd = []
lr_arr = [base_learning_rate]
for i in range(1, max_epoch):
bd.append(total_batch_size * i)
new_lr = base_learning_rate * (lr_decay
**max(i + 1 - epoch_start_decay, 0.0))
lr_arr.append(new_lr)
grad_clip = paddle.nn.ClipGradByGlobalNorm(max_grad_norm)
sgd = paddle.optimizer.Adagrad(
parameters=ptb_model.parameters(),
learning_rate=base_learning_rate,
#learning_rate=paddle.fluid.layers.piecewise_decay(
# boundaries=bd, values=lr_arr),
grad_clip=grad_clip)
print("parameters:--------------------------------")
for para in ptb_model.parameters():
print(para.name)
print("parameters:--------------------------------")
def eval(model, data):
print("begion to eval")
total_loss = 0.0
iters = 0.0
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
model.eval()
train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
init_hidden = paddle.to_tensor(
data=init_hidden_data, dtype=None, place=None, stop_gradient=True)
accum_num_recall = 0.0
for batch_id, batch in enumerate(train_data_iter):
x_data, y_data = batch
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, num_steps, 1))
x = paddle.to_tensor(
data=x_data, dtype=None, place=None, stop_gradient=True)
y = paddle.to_tensor(
data=y_data, dtype=None, place=None, stop_gradient=True)
dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden)
out_loss = dy_loss.numpy()
acc_ = acc.numpy()[0]
accum_num_recall += acc_
if batch_id % 1 == 0:
print("batch_id:%d recall@20:%.4f" %
(batch_id, accum_num_recall / (batch_id + 1)))
init_hidden = last_hidden
total_loss += out_loss
iters += num_steps
print("eval finished")
ppl = np.exp(total_loss / iters)
print("recall@20 ", accum_num_recall / (batch_id + 1))
if args.ce:
print("kpis\ttest_ppl\t%0.3f" % ppl[0])
for epoch_id in range(max_epoch):
ptb_model.train()
total_loss = 0.0
iters = 0.0
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
train_data_iter = reader.get_data_iter(train_data, batch_size,
num_steps)
init_hidden = paddle.to_tensor(
data=init_hidden_data, dtype=None, place=None, stop_gradient=True)
start_time = time.time()
for batch_id, batch in enumerate(train_data_iter):
x_data, y_data = batch
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, num_steps, 1))
x = paddle.to_tensor(
data=x_data, dtype=None, place=None, stop_gradient=True)
y = paddle.to_tensor(
data=y_data, dtype=None, place=None, stop_gradient=True)
dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden)
out_loss = dy_loss.numpy()
acc_ = acc.numpy()[0]
init_hidden = last_hidden.detach()
dy_loss.backward()
sgd.minimize(dy_loss)
ptb_model.clear_gradients()
total_loss += out_loss
iters += num_steps
if batch_id > 0 and batch_id % 100 == 1:
ppl = np.exp(total_loss / iters)
print(
"-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, acc: %.5f, lr: %.5f"
% (epoch_id, batch_id, ppl[0], acc_,
sgd._global_learning_rate().numpy()))
print("one ecpoh finished", epoch_id)
print("time cost ", time.time() - start_time)
ppl = np.exp(total_loss / iters)
print("-- Epoch:[%d]; ppl: %.5f" % (epoch_id, ppl[0]))
if args.ce:
print("kpis\ttrain_ppl\t%0.3f" % ppl[0])
save_model_dir = os.path.join(args.save_model_dir,
str(epoch_id), 'params')
paddle.fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
print("Saved model to: %s.\n" % save_model_dir)
eval(ptb_model, test_data)
paddle.enable_static()
#eval(ptb_model, test_data)
train_ptb_lm()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册