未验证 提交 5f187850 编写于 作者: Z zhang wenhui 提交者: GitHub

Update2.0 model (#4905)

* update api 1.8

* fix paddlerec readme

* update 20 , test=develop
上级 3fad507e
[156, 51, 24, 103, 195, 35, 188, 16, 224, 173, 116, 3, 226, 11, 64, 94, 6, 70, 197, 164, 220, 77, 172, 194, 227, 12, 65, 129, 39, 38, 75, 210, 215, 36, 46, 185, 76, 222, 108, 78, 120, 71, 33, 189, 135, 97, 90, 219, 105, 205, 136, 167, 106, 29, 157, 125, 217, 121, 175, 143, 200, 45, 179, 37, 86, 140, 225, 47, 20, 228, 4, 209, 177, 178, 171, 58, 48, 118, 9, 149, 55, 192, 82, 17, 43, 54, 93, 96, 159, 216, 18, 206, 223, 104, 132, 182, 60, 109, 28, 180, 44, 166, 128, 27, 163, 141, 229, 102, 150, 7, 83, 198, 41, 191, 114, 117, 122, 161, 130, 174, 176, 160, 201, 49, 112, 69, 165, 95, 133, 92, 59, 110, 151, 203, 67, 169, 21, 66, 80, 22, 23, 152, 40, 127, 111, 186, 72, 26, 190, 42, 0, 63, 53, 124, 137, 85, 126, 196, 187, 208, 98, 25, 15, 170, 193, 168, 202, 31, 146, 147, 113, 32, 204, 131, 68, 84, 213, 19, 81, 79, 162, 199, 107, 50, 2, 207, 10, 181, 144, 139, 134, 62, 155, 142, 214, 212, 61, 52, 101, 99, 158, 145, 13, 153, 56, 184, 221]
\ No newline at end of file
import os
import shutil
import sys
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
TOOLS_PATH = os.path.join(LOCAL_PATH, "..", "..", "tools")
sys.path.append(TOOLS_PATH)
from tools import download_file_and_uncompress, download_file
if __name__ == '__main__':
url = "https://s3-eu-west-1.amazonaws.com/kaggle-display-advertising-challenge-dataset/dac.tar.gz"
url2 = "https://paddlerec.bj.bcebos.com/deepfm%2Ffeat_dict_10.pkl2"
print("download and extract starting...")
download_file_and_uncompress(url)
if not os.path.exists("aid_data"):
os.makedirs("aid_data")
download_file(url2, "./aid_data/feat_dict_10.pkl2", True)
print("download and extract finished")
print("preprocessing...")
os.system("python preprocess.py")
print("preprocess done")
shutil.rmtree("raw_data")
print("done")
from __future__ import division
import os
import numpy
from collections import Counter
import shutil
import pickle
def get_raw_data(intput_file, raw_data, ins_per_file):
if not os.path.isdir(raw_data):
os.mkdir(raw_data)
fin = open(intput_file, 'r')
fout = open(os.path.join(raw_data, 'part-0'), 'w')
for line_idx, line in enumerate(fin):
if line_idx % ins_per_file == 0 and line_idx != 0:
fout.close()
cur_part_idx = int(line_idx / ins_per_file)
fout = open(
os.path.join(raw_data, 'part-' + str(cur_part_idx)), 'w')
fout.write(line)
fout.close()
fin.close()
def split_data(raw_data, aid_data, train_data, test_data):
split_rate_ = 0.9
dir_train_file_idx_ = os.path.join(aid_data, 'train_file_idx.txt')
filelist_ = [
os.path.join(raw_data, 'part-%d' % x)
for x in range(len(os.listdir(raw_data)))
]
if not os.path.exists(dir_train_file_idx_):
train_file_idx = list(
numpy.random.choice(
len(filelist_), int(len(filelist_) * split_rate_), False))
with open(dir_train_file_idx_, 'w') as fout:
fout.write(str(train_file_idx))
else:
with open(dir_train_file_idx_, 'r') as fin:
train_file_idx = eval(fin.read())
for idx in range(len(filelist_)):
if idx in train_file_idx:
shutil.move(filelist_[idx], train_data)
else:
shutil.move(filelist_[idx], test_data)
def get_feat_dict(intput_file, aid_data, print_freq=100000, total_ins=45000000):
freq_ = 10
dir_feat_dict_ = os.path.join(aid_data, 'feat_dict_' + str(freq_) + '.pkl2')
continuous_range_ = range(1, 14)
categorical_range_ = range(14, 40)
if not os.path.exists(dir_feat_dict_):
# print('generate a feature dict')
# Count the number of occurrences of discrete features
feat_cnt = Counter()
with open(intput_file, 'r') as fin:
for line_idx, line in enumerate(fin):
if line_idx % print_freq == 0:
print(r'generating feature dict {:.2f} %'.format((
line_idx / total_ins) * 100))
features = line.rstrip('\n').split('\t')
for idx in categorical_range_:
if features[idx] == '': continue
feat_cnt.update([features[idx]])
# Only retain discrete features with high frequency
dis_feat_set = set()
for feat, ot in feat_cnt.items():
if ot >= freq_:
dis_feat_set.add(feat)
# Create a dictionary for continuous and discrete features
feat_dict = {}
tc = 1
# Continuous features
for idx in continuous_range_:
feat_dict[idx] = tc
tc += 1
for feat in dis_feat_set:
feat_dict[feat] = tc
tc += 1
# Save dictionary
with open(dir_feat_dict_, 'wb') as fout:
pickle.dump(feat_dict, fout, protocol=2)
print('args.num_feat ', len(feat_dict) + 1)
def preprocess(input_file,
outdir,
ins_per_file,
total_ins=None,
print_freq=None):
train_data = os.path.join(outdir, "train_data")
test_data = os.path.join(outdir, "test_data")
aid_data = os.path.join(outdir, "aid_data")
raw_data = os.path.join(outdir, "raw_data")
if not os.path.isdir(train_data):
os.mkdir(train_data)
if not os.path.isdir(test_data):
os.mkdir(test_data)
if not os.path.isdir(aid_data):
os.mkdir(aid_data)
if print_freq is None:
print_freq = 10 * ins_per_file
get_raw_data(input_file, raw_data, ins_per_file)
split_data(raw_data, aid_data, train_data, test_data)
get_feat_dict(input_file, aid_data, print_freq, total_ins)
print('Done!')
if __name__ == '__main__':
preprocess('train.txt', './', 200000, 45000000)
...@@ -16,20 +16,13 @@ from __future__ import print_function ...@@ -16,20 +16,13 @@ from __future__ import print_function
import os import os
import unittest import unittest
import paddle.fluid as fluid import paddle
import paddle.fluid.core as core
from paddle.fluid.dygraph.nn import Embedding
import paddle.fluid.framework as framework
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.optimizer import AdagradOptimizer
from paddle.fluid.dygraph.base import to_variable
import numpy as np import numpy as np
import six import six
import reader import reader
import model_check import model_check
import time import time
from args import * from args import *
import sys import sys
...@@ -38,7 +31,7 @@ if sys.version[0] == '2': ...@@ -38,7 +31,7 @@ if sys.version[0] == '2':
sys.setdefaultencoding("utf-8") sys.setdefaultencoding("utf-8")
class SimpleGRURNN(fluid.Layer): class SimpleGRURNN(paddle.fluid.Layer):
def __init__(self, def __init__(self,
hidden_size, hidden_size,
num_steps, num_steps,
...@@ -61,47 +54,42 @@ class SimpleGRURNN(fluid.Layer): ...@@ -61,47 +54,42 @@ class SimpleGRURNN(fluid.Layer):
for i in range(self._num_layers): for i in range(self._num_layers):
weight_1 = self.create_parameter( weight_1 = self.create_parameter(
attr=fluid.ParamAttr( attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)), low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 2, self._hidden_size * 2], shape=[self._hidden_size * 2, self._hidden_size * 2],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale)) low=-self._init_scale, high=self._init_scale))
self.weight_1_arr.append(self.add_parameter('w1_%d' % i, weight_1)) self.weight_1_arr.append(self.add_parameter('w1_%d' % i, weight_1))
weight_2 = self.create_parameter( weight_2 = self.create_parameter(
attr=fluid.ParamAttr( attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)), low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size, self._hidden_size], shape=[self._hidden_size, self._hidden_size],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale)) low=-self._init_scale, high=self._init_scale))
self.weight_2_arr.append(self.add_parameter('w2_%d' % i, weight_2)) self.weight_2_arr.append(self.add_parameter('w2_%d' % i, weight_2))
weight_3 = self.create_parameter( weight_3 = self.create_parameter(
attr=fluid.ParamAttr( attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)), low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size, self._hidden_size], shape=[self._hidden_size, self._hidden_size],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=paddle.nn.initializer.Uniform(
low=-self._init_scale, high=self._init_scale)) low=-self._init_scale, high=self._init_scale))
self.weight_3_arr.append(self.add_parameter('w3_%d' % i, weight_3)) self.weight_3_arr.append(self.add_parameter('w3_%d' % i, weight_3))
bias_1 = self.create_parameter( bias_1 = self.create_parameter(
attr=fluid.ParamAttr( attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)), low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 2], shape=[self._hidden_size * 2],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.Constant(0.0)) default_initializer=paddle.nn.initializer.Constant(0.0))
self.bias_1_arr.append(self.add_parameter('b1_%d' % i, bias_1)) self.bias_1_arr.append(self.add_parameter('b1_%d' % i, bias_1))
bias_2 = self.create_parameter( bias_2 = self.create_parameter(
attr=fluid.ParamAttr( attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Uniform(
initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)), low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 1], shape=[self._hidden_size * 1],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.Constant(0.0)) default_initializer=paddle.nn.initializer.Constant(0.0))
self.bias_2_arr.append(self.add_parameter('b2_%d' % i, bias_2)) self.bias_2_arr.append(self.add_parameter('b2_%d' % i, bias_2))
def forward(self, input_embedding, init_hidden=None): def forward(self, input_embedding, init_hidden=None):
...@@ -121,39 +109,38 @@ class SimpleGRURNN(fluid.Layer): ...@@ -121,39 +109,38 @@ class SimpleGRURNN(fluid.Layer):
bias_1 = self.bias_1_arr[k] bias_1 = self.bias_1_arr[k]
bias_2 = self.bias_2_arr[k] bias_2 = self.bias_2_arr[k]
nn = fluid.layers.concat([step_input, pre_hidden], 1) nn = paddle.concat(x=[step_input, pre_hidden], axis=1)
gate_input = fluid.layers.matmul(x=nn, y=weight_1) gate_input = paddle.matmul(x=nn, y=weight_1)
gate_input = fluid.layers.elementwise_add(gate_input, bias_1) gate_input = paddle.add(x=gate_input, y=bias_1)
u, r = fluid.layers.split(gate_input, num_or_sections=2, dim=-1) u, r = paddle.split(x=gate_input, num_or_sections=2, axis=-1)
hidden_c = fluid.layers.tanh( hidden_c = paddle.tanh(
fluid.layers.elementwise_add( paddle.add(x=paddle.matmul(
fluid.layers.matmul( x=step_input, y=weight_2) + paddle.matmul(
x=step_input, y=weight_2) + fluid.layers.matmul( x=(paddle.nn.functional.sigmoid(r) * pre_hidden),
x=(fluid.layers.sigmoid(r) * pre_hidden),
y=weight_3), y=weight_3),
bias_2)) y=bias_2))
hidden_state = fluid.layers.sigmoid(u) * pre_hidden + ( hidden_state = paddle.nn.functional.sigmoid(u) * pre_hidden + (
1.0 - fluid.layers.sigmoid(u)) * hidden_c 1.0 - paddle.nn.functional.sigmoid(u)) * hidden_c
hidden_array[k] = hidden_state hidden_array[k] = hidden_state
step_input = hidden_state step_input = hidden_state
if self._dropout is not None and self._dropout > 0.0: if self._dropout is not None and self._dropout > 0.0:
step_input = fluid.layers.dropout( step_input = paddle.fluid.layers.dropout(
step_input, step_input,
dropout_prob=self._dropout, dropout_prob=self._dropout,
dropout_implementation='upscale_in_train') dropout_implementation='upscale_in_train')
res.append(step_input) res.append(step_input)
real_res = fluid.layers.concat(res, 1) real_res = paddle.concat(x=res, axis=1)
real_res = fluid.layers.reshape( real_res = paddle.fluid.layers.reshape(
real_res, [-1, self._num_steps, self._hidden_size]) real_res, [-1, self._num_steps, self._hidden_size])
last_hidden = fluid.layers.concat(hidden_array, 1) last_hidden = paddle.concat(x=hidden_array, axis=1)
last_hidden = fluid.layers.reshape( last_hidden = paddle.fluid.layers.reshape(
last_hidden, shape=[-1, self._num_layers, self._hidden_size]) last_hidden, shape=[-1, self._num_layers, self._hidden_size])
last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2])
return real_res, last_hidden return real_res, last_hidden
class PtbModel(fluid.Layer): class PtbModel(paddle.fluid.Layer):
def __init__(self, def __init__(self,
name_scope, name_scope,
hidden_size, hidden_size,
...@@ -177,26 +164,26 @@ class PtbModel(fluid.Layer): ...@@ -177,26 +164,26 @@ class PtbModel(fluid.Layer):
num_layers=num_layers, num_layers=num_layers,
init_scale=init_scale, init_scale=init_scale,
dropout=dropout) dropout=dropout)
self.embedding = Embedding( self.embedding = paddle.fluid.dygraph.nn.Embedding(
#self.full_name(), #self.full_name(),
size=[vocab_size, hidden_size], size=[vocab_size, hidden_size],
dtype='float32', dtype='float32',
is_sparse=False, is_sparse=False,
param_attr=fluid.ParamAttr( param_attr=paddle.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=paddle.nn.initializer.Uniform(
low=-init_scale, high=init_scale))) low=-init_scale, high=init_scale)))
self.softmax_weight = self.create_parameter( self.softmax_weight = self.create_parameter(
attr=fluid.ParamAttr(), attr=paddle.ParamAttr(),
shape=[self.hidden_size, self.vocab_size], shape=[self.hidden_size, self.vocab_size],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale)) low=-self.init_scale, high=self.init_scale))
self.softmax_bias = self.create_parameter( self.softmax_bias = self.create_parameter(
attr=fluid.ParamAttr(), attr=paddle.ParamAttr(),
shape=[self.vocab_size], shape=[self.vocab_size],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale)) low=-self.init_scale, high=self.init_scale))
def build_once(self, input, label, init_hidden): def build_once(self, input, label, init_hidden):
...@@ -204,30 +191,31 @@ class PtbModel(fluid.Layer): ...@@ -204,30 +191,31 @@ class PtbModel(fluid.Layer):
def forward(self, input, label, init_hidden): def forward(self, input, label, init_hidden):
init_h = fluid.layers.reshape( init_h = paddle.fluid.layers.reshape(
init_hidden, shape=[self.num_layers, -1, self.hidden_size]) init_hidden, shape=[self.num_layers, -1, self.hidden_size])
x_emb = self.embedding(input) x_emb = self.embedding(input)
x_emb = fluid.layers.reshape( x_emb = paddle.fluid.layers.reshape(
x_emb, shape=[-1, self.num_steps, self.hidden_size]) x_emb, shape=[-1, self.num_steps, self.hidden_size])
if self.dropout is not None and self.dropout > 0.0: if self.dropout is not None and self.dropout > 0.0:
x_emb = fluid.layers.dropout( x_emb = paddle.fluid.layers.dropout(
x_emb, x_emb,
dropout_prob=self.dropout, dropout_prob=self.dropout,
dropout_implementation='upscale_in_train') dropout_implementation='upscale_in_train')
rnn_out, last_hidden = self.simple_gru_rnn(x_emb, init_h) rnn_out, last_hidden = self.simple_gru_rnn(x_emb, init_h)
projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.matmul(x=rnn_out, y=self.softmax_weight)
projection = fluid.layers.elementwise_add(projection, self.softmax_bias) projection = paddle.add(x=projection, y=self.softmax_bias)
loss = fluid.layers.softmax_with_cross_entropy( loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False) logits=projection, label=label, soft_label=False)
pre_2d = fluid.layers.reshape(projection, shape=[-1, self.vocab_size]) pre_2d = paddle.fluid.layers.reshape(
label_2d = fluid.layers.reshape(label, shape=[-1, 1]) projection, shape=[-1, self.vocab_size])
acc = fluid.layers.accuracy(input=pre_2d, label=label_2d, k=20) label_2d = paddle.fluid.layers.reshape(label, shape=[-1, 1])
loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) acc = paddle.metric.accuracy(input=pre_2d, label=label_2d, k=20)
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = paddle.fluid.layers.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_sum(loss) loss = paddle.reduce_mean(loss, dim=[0])
loss = paddle.reduce_sum(loss)
return loss, last_hidden, acc return loss, last_hidden, acc
...@@ -263,13 +251,13 @@ def train_ptb_lm(): ...@@ -263,13 +251,13 @@ def train_ptb_lm():
print("model type not support") print("model type not support")
return return
with fluid.dygraph.guard(core.CUDAPlace(0)): paddle.disable_static(paddle.fluid.core.CUDAPlace(0))
if args.ce: if args.ce:
print("ce mode") print("ce mode")
seed = 33 seed = 33
np.random.seed(seed) np.random.seed(seed)
fluid.default_startup_program().random_seed = seed paddle.static.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed paddle.static.default_main_program().random_seed = seed
max_epoch = 1 max_epoch = 1
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model", "ptb_model",
...@@ -285,7 +273,7 @@ def train_ptb_lm(): ...@@ -285,7 +273,7 @@ def train_ptb_lm():
print(args.init_from_pretrain_model) print(args.init_from_pretrain_model)
raise Warning("The pretrained params do not exist.") raise Warning("The pretrained params do not exist.")
return return
fluid.load_dygraph(args.init_from_pretrain_model) paddle.fluid.load_dygraph(args.init_from_pretrain_model)
print("finish initing model from pretrained params from %s" % print("finish initing model from pretrained params from %s" %
(args.init_from_pretrain_model)) (args.init_from_pretrain_model))
...@@ -309,15 +297,16 @@ def train_ptb_lm(): ...@@ -309,15 +297,16 @@ def train_ptb_lm():
lr_arr = [base_learning_rate] lr_arr = [base_learning_rate]
for i in range(1, max_epoch): for i in range(1, max_epoch):
bd.append(total_batch_size * i) bd.append(total_batch_size * i)
new_lr = base_learning_rate * (lr_decay** new_lr = base_learning_rate * (lr_decay
max(i + 1 - epoch_start_decay, 0.0)) **max(i + 1 - epoch_start_decay, 0.0))
lr_arr.append(new_lr) lr_arr.append(new_lr)
grad_clip = fluid.clip.GradientClipByGlobalNorm(max_grad_norm) grad_clip = paddle.nn.ClipGradByGlobalNorm(max_grad_norm)
sgd = AdagradOptimizer( sgd = paddle.optimizer.Adagrad(
parameter_list=ptb_model.parameters(), parameters=ptb_model.parameters(),
learning_rate=fluid.layers.piecewise_decay( learning_rate=base_learning_rate,
boundaries=bd, values=lr_arr), #learning_rate=paddle.fluid.layers.piecewise_decay(
# boundaries=bd, values=lr_arr),
grad_clip=grad_clip) grad_clip=grad_clip)
print("parameters:--------------------------------") print("parameters:--------------------------------")
...@@ -334,14 +323,17 @@ def train_ptb_lm(): ...@@ -334,14 +323,17 @@ def train_ptb_lm():
model.eval() model.eval()
train_data_iter = reader.get_data_iter(data, batch_size, num_steps) train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
init_hidden = to_variable(init_hidden_data) init_hidden = paddle.to_tensor(
data=init_hidden_data, dtype=None, place=None, stop_gradient=True)
accum_num_recall = 0.0 accum_num_recall = 0.0
for batch_id, batch in enumerate(train_data_iter): for batch_id, batch in enumerate(train_data_iter):
x_data, y_data = batch x_data, y_data = batch
x_data = x_data.reshape((-1, num_steps, 1)) x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, num_steps, 1))
x = to_variable(x_data) x = paddle.to_tensor(
y = to_variable(y_data) data=x_data, dtype=None, place=None, stop_gradient=True)
y = paddle.to_tensor(
data=y_data, dtype=None, place=None, stop_gradient=True)
dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden) dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden)
out_loss = dy_loss.numpy() out_loss = dy_loss.numpy()
...@@ -371,15 +363,18 @@ def train_ptb_lm(): ...@@ -371,15 +363,18 @@ def train_ptb_lm():
train_data_iter = reader.get_data_iter(train_data, batch_size, train_data_iter = reader.get_data_iter(train_data, batch_size,
num_steps) num_steps)
init_hidden = to_variable(init_hidden_data) init_hidden = paddle.to_tensor(
data=init_hidden_data, dtype=None, place=None, stop_gradient=True)
start_time = time.time() start_time = time.time()
for batch_id, batch in enumerate(train_data_iter): for batch_id, batch in enumerate(train_data_iter):
x_data, y_data = batch x_data, y_data = batch
x_data = x_data.reshape((-1, num_steps, 1)) x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, num_steps, 1))
x = to_variable(x_data) x = paddle.to_tensor(
y = to_variable(y_data) data=x_data, dtype=None, place=None, stop_gradient=True)
y = paddle.to_tensor(
data=y_data, dtype=None, place=None, stop_gradient=True)
dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden) dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden)
out_loss = dy_loss.numpy() out_loss = dy_loss.numpy()
...@@ -407,9 +402,10 @@ def train_ptb_lm(): ...@@ -407,9 +402,10 @@ def train_ptb_lm():
print("kpis\ttrain_ppl\t%0.3f" % ppl[0]) print("kpis\ttrain_ppl\t%0.3f" % ppl[0])
save_model_dir = os.path.join(args.save_model_dir, save_model_dir = os.path.join(args.save_model_dir,
str(epoch_id), 'params') str(epoch_id), 'params')
fluid.save_dygraph(ptb_model.state_dict(), save_model_dir) paddle.fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
print("Saved model to: %s.\n" % save_model_dir) print("Saved model to: %s.\n" % save_model_dir)
eval(ptb_model, test_data) eval(ptb_model, test_data)
paddle.enable_static()
#eval(ptb_model, test_data) #eval(ptb_model, test_data)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册