提交 424ba529 编写于 作者: B barrierye

update code to make test acc=0.116

上级 e2d65fef
...@@ -60,10 +60,10 @@ mpirun -np 2 python application.py lm_data ...@@ -60,10 +60,10 @@ mpirun -np 2 python application.py lm_data
### 训练结果 ### 训练结果
```shell ```shell
framework.py : INFO infer results: 0.085723 framework.py : INFO infer results: 0.116334
``` ```
即:在测试集上的,测试Top1为 8.6% 即:在测试集上的,测试Top1为 11.6%
## 添加自己的数据集和Trainer ## 添加自己的数据集和Trainer
...@@ -89,7 +89,7 @@ framework.py : INFO infer results: 0.085723 ...@@ -89,7 +89,7 @@ framework.py : INFO infer results: 0.085723
- Step1 模型初始化 - Step1 模型初始化
1. 全局参数初始化:由编号为0的simulator来做模型初始化工作,初始化之后,它会通过UpdateGlobalParams()接口将参数传递给Scheduler; 1. 全局参数初始化:由编号为0的simulator来做模型初始化工作,初始化之后,它会通过UpdateGlobalParams()接口将参数传递给Scheduler;
2. 个性化参数初始化 2. 个性化参数初始化
- Step2 模型分发 - Step2 模型分发
......
...@@ -36,17 +36,22 @@ simulator = SimulationFramework(role_maker) ...@@ -36,17 +36,22 @@ simulator = SimulationFramework(role_maker)
language_model_trainer = LanguageModelTrainer() language_model_trainer = LanguageModelTrainer()
language_model_trainer.set_trainer_configs({ language_model_trainer.set_trainer_configs({
"epoch": 3, "epoch": 1,
"max_steps_in_epoch": -1, "max_steps_in_epoch": -1,
"lr": 0.1, "lr": 1.0,
"batch_size": 5, "batch_size": 5,
"max_grad_norm": 5,
"n_hidden": 256,
"num_layers": 2,
"init_scale": 0.1,
"dropout_prob": 0.0,
}) })
sampler = UniformSampler() sampler = UniformSampler()
sampler.set_sample_num(30) sampler.set_sample_num(10)
sampler.set_min_ins_num(1) sampler.set_min_ins_num(1)
test_sampler = Test1percentSampler() test_sampler = Test1percentSampler()
fed_avg_optimizer = FedAvgOptimizer(learning_rate=2.0) fed_avg_optimizer = FedAvgOptimizer(learning_rate=1.85)
simulator.set_trainer(language_model_trainer) simulator.set_trainer(language_model_trainer)
simulator.set_sampler(sampler) simulator.set_sampler(sampler)
...@@ -68,5 +73,8 @@ elif simulator.is_simulator(): ...@@ -68,5 +73,8 @@ elif simulator.is_simulator():
print("dates: {}".format(dates)) print("dates: {}".format(dates))
time.sleep(10) time.sleep(10)
simulator.run_simulation( simulator.run_simulation(base_path,
base_path, dates, sim_num_everyday=100, do_test=True, test_skip_day=1) dates,
sim_num_everyday=100,
do_test=True,
test_skip_day=1)
...@@ -22,8 +22,6 @@ import paddle.fluid as fluid ...@@ -22,8 +22,6 @@ import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layers.control_flow import StaticRNN as PaddingRNN
import numpy as np
from paddle.fluid import ParamAttr from paddle.fluid import ParamAttr
from paddle.fluid.contrib.layers import basic_lstm from paddle.fluid.contrib.layers import basic_lstm
...@@ -31,19 +29,19 @@ from paddle.fluid.contrib.layers import basic_lstm ...@@ -31,19 +29,19 @@ from paddle.fluid.contrib.layers import basic_lstm
class LanguageModel(ModelBase): class LanguageModel(ModelBase):
def __init__(self): def __init__(self):
# model args # model args
self.hidden_size_ = 200 self.seq_len_ = 10 # fixed
self.vocab_size_ = 10000 self.n_hidden_ = 256
self.num_layers_ = 2 self.num_layers_ = 2
self.num_steps_ = 10 # fix
self.init_scale_ = 0.1
self.dropout_ = 0.0
self.rnn_model_ = 'basic_lstm'
self.pad_symbol_ = 0 self.pad_symbol_ = 0
self.unk_symbol_ = 1 self.unk_symbol_ = 1
self.vocab_size_ = 10000
self.init_scale_ = 0.1
self.max_grad_norm_ = 5
self.dropout_prob_ = 0.0
# results # results
self.correct_ = None self.correct_ = None
self.prediction_ = None self.pred_ = None
self.loss_ = None self.loss_ = None
# private vars # private vars
...@@ -53,6 +51,13 @@ class LanguageModel(ModelBase): ...@@ -53,6 +51,13 @@ class LanguageModel(ModelBase):
self.input_name_list_ = None self.input_name_list_ = None
self.target_var_names_ = [] self.target_var_names_ = []
def update_params(self, config):
self.n_hidden_ = config.get("n_hidden", 256)
self.num_layers_ = config.get("num_layers", 2)
self.init_scale_ = config.get("init_scale", 0.1)
self.max_grad_norm_ = config.get("max_grad_norm", 5)
self.dropout_prob_ = config.get("dropout_prob", 0.0)
def get_model_input_names(self): def get_model_input_names(self):
return self.input_name_list_ return self.input_name_list_
...@@ -63,381 +68,157 @@ class LanguageModel(ModelBase): ...@@ -63,381 +68,157 @@ class LanguageModel(ModelBase):
return self.loss_.name return self.loss_.name
def get_model_metrics(self): def get_model_metrics(self):
return {"correct": self.correct_.name} metrics = {
"init_hidden": self.last_hidden_.name,
"init_cell": self.last_cell_.name,
"correct": self.correct_.name
}
return metrics
def get_target_names(self): def get_target_names(self):
return self.target_var_names_ return self.target_var_names_
def build_model(self, model_configs): def build_model(self, model_configs):
hidden_size = self.hidden_size_ self.update_params(model_configs)
init_scale = self.init_scale_ features = fluid.layers.data(name="features",
dropout = self.dropout_ shape=[None, self.seq_len_],
num_layers = self.num_layers_ dtype='int64')
num_steps = self.num_steps_ labels = fluid.layers.data(name="labels",
pad_symbol = self.pad_symbol_ shape=[None, self.seq_len_],
unk_symbol = self.unk_symbol_ dtype='int64')
vocab_size = self.vocab_size_ sequence_length_ph = fluid.layers.data(name="seq_len_ph",
rnn_model = self.rnn_model_ shape=[None],
x = fluid.data(name="x", shape=[None, num_steps], dtype='int64') dtype='int64')
y = fluid.data(name="y", shape=[None, num_steps], dtype='int64') sequence_mask_ph = fluid.layers.data(name="seq_mask_ph",
x = layers.reshape(x, shape=[-1, num_steps, 1]) shape=[None],
y = layers.reshape(y, shape=[-1, 1]) dtype='float32')
self.input_name_list_ = ['x', 'y']
init_hidden = fluid.layers.data(
init_hidden = layers.fill_constant_batch_size_like( name="init_hidden",
input=x, shape=[None, self.num_layers_, self.n_hidden_],
shape=[-1, num_layers, hidden_size], dtype='float32')
value=0, init_cell = fluid.layers.data(
dtype="float32") name="init_cell",
init_cell = layers.fill_constant_batch_size_like( shape=[None, self.num_layers_, self.n_hidden_],
input=x, dtype='float32')
shape=[-1, num_layers, hidden_size],
value=0,
dtype="float32")
init_hidden = layers.transpose(init_hidden, perm=[1, 0, 2]) init_hidden = layers.transpose(init_hidden, perm=[1, 0, 2])
init_cell = layers.transpose(init_cell, perm=[1, 0, 2]) init_cell = layers.transpose(init_cell, perm=[1, 0, 2])
init_hidden_reshape = layers.reshape( init_hidden_reshape = layers.reshape(
init_hidden, shape=[num_layers, -1, hidden_size]) init_hidden, shape=[self.num_layers_, -1, self.n_hidden_])
init_cell_reshape = layers.reshape( init_cell_reshape = layers.reshape(
init_cell, shape=[num_layers, -1, hidden_size]) init_cell, shape=[self.num_layers_, -1, self.n_hidden_])
x_emb = layers.embedding( features = layers.reshape(features, shape=[-1, self.seq_len_, 1])
input=x,
size=[vocab_size, hidden_size], # word embedding
inputs = layers.embedding(
input=features,
size=[self.vocab_size_, self.n_hidden_],
dtype='float32', dtype='float32',
is_sparse=False, is_sparse=False,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale))) low=-self.init_scale_, high=self.init_scale_)))
x_emb = layers.reshape( # LSTM
x_emb, shape=[-1, num_steps, hidden_size], inplace=True) output, last_hidden, last_cell = self._build_rnn_graph(
if dropout != None and dropout > 0.0: inputs, init_hidden, init_cell, sequence_length_ph)
x_emb = layers.dropout(
x_emb, output = layers.reshape(output,
dropout_prob=dropout, shape=[-1, self.seq_len_, self.n_hidden_],
dropout_implementation='upscale_in_train') inplace=True)
self.last_hidden_ = layers.reshape(
if rnn_model == "padding": last_hidden, [-1, self.num_layers_, self.n_hidden_])
rnn_out, last_hidden, last_cell = self._padding_rnn( self.last_cell_ = layers.reshape(
x_emb, last_cell, [-1, self.num_layers_, self.n_hidden_])
len=num_steps,
init_hidden=init_hidden_reshape, # softmax
init_cell=init_cell_reshape) softmax_w = layers.create_parameter(
elif rnn_model == "static": [self.n_hidden_, self.vocab_size_],
rnn_out, last_hidden, last_cell = self._encoder_static(
x_emb,
len=num_steps,
init_hidden=init_hidden_reshape,
init_cell=init_cell_reshape)
elif rnn_model == "cudnn":
x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
rnn_out, last_hidden, last_cell = layers.lstm(
x_emb,
init_hidden_reshape,
init_cell_reshape,
num_steps,
hidden_size,
num_layers,
is_bidirec=False,
default_initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale))
rnn_out = layers.transpose(rnn_out, perm=[1, 0, 2])
elif rnn_model == "basic_lstm":
rnn_out, last_hidden, last_cell = basic_lstm(
x_emb,
init_hidden,
init_cell,
hidden_size,
num_layers=num_layers,
batch_first=True,
dropout_prob=dropout,
param_attr=ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale)),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0)),
forget_bias=0.0)
else:
raise Exception("type not support")
rnn_out = layers.reshape(
rnn_out, shape=[-1, num_steps, hidden_size], inplace=True)
softmax_weight = layers.create_parameter(
[hidden_size, vocab_size],
dtype="float32", dtype="float32",
name="softmax_weight", name="softmax_w",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale)) low=-self.init_scale_, high=self.init_scale_))
softmax_bias = layers.create_parameter( softmax_b = layers.create_parameter(
[vocab_size], [self.vocab_size_],
dtype="float32", dtype="float32",
name='softmax_bias', name='softmax_b',
default_initializer=fluid.initializer.UniformInitializer( default_initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale)) low=-self.init_scale_, high=self.init_scale_))
projection = layers.matmul(rnn_out, softmax_weight) logits = layers.matmul(output, softmax_w)
projection = layers.elementwise_add(projection, softmax_bias) logits = layers.elementwise_add(logits, softmax_b)
projection = layers.reshape( logits = layers.reshape(logits,
projection, shape=[-1, vocab_size], inplace=True) shape=[-1, self.vocab_size_],
inplace=True)
# correct predictions # correct predictions
labels_reshaped = fluid.layers.reshape(y, [-1]) labels_reshaped = layers.reshape(labels, [-1])
pred = fluid.layers.cast( pred = layers.cast(layers.argmax(logits, 1), dtype="int64")
fluid.layers.argmax(projection, 1), dtype="int64") correct_pred = layers.cast(layers.equal(pred, labels_reshaped),
correct_pred = fluid.layers.cast( dtype="int64")
fluid.layers.equal(pred, labels_reshaped), dtype="int64") self.pred_ = pred
self.prediction_ = pred
self.target_var_names_.append(pred)
# predicting unknown is always considered wrong # predicting unknown is always considered wrong
unk_tensor = fluid.layers.fill_constant( # only in paddle 1.8
fluid.layers.shape(labels_reshaped), unk_tensor = layers.fill_constant(layers.shape(labels_reshaped),
value=unk_symbol, value=self.unk_symbol_,
dtype='int64') dtype='int64')
pred_unk = fluid.layers.cast( pred_unk = layers.cast(layers.equal(pred, unk_tensor), dtype="int64")
fluid.layers.equal(pred, unk_tensor), dtype="int64") correct_unk = layers.elementwise_mul(pred_unk, correct_pred)
correct_unk = fluid.layers.elementwise_mul(pred_unk, correct_pred)
# predicting padding is always considered wrong # predicting padding is always considered wrong
pad_tensor = fluid.layers.fill_constant( pad_tensor = layers.fill_constant(layers.shape(labels_reshaped),
fluid.layers.shape(labels_reshaped), value=0, dtype='int64') value=self.pad_symbol_,
pred_pad = fluid.layers.cast( dtype='int64')
fluid.layers.equal(pred, pad_tensor), dtype="int64") pred_pad = layers.cast(layers.equal(pred, pad_tensor), dtype="int64")
correct_pad = fluid.layers.elementwise_mul(pred_pad, correct_pred) correct_pad = layers.elementwise_mul(pred_pad, correct_pred)
# acc # Reshape logits to be a 3-D tensor for sequence loss
correct_count = fluid.layers.reduce_sum(correct_pred) \ logits = layers.reshape(logits, [-1, self.seq_len_, self.vocab_size_])
labels = layers.reshape(labels, [-1, self.seq_len_, 1])
loss = layers.softmax_with_cross_entropy(logits=logits,
label=labels,
soft_label=False,
return_softmax=False)
sequence_mask = layers.reshape(sequence_mask_ph,
[-1, self.seq_len_, 1])
loss = layers.reduce_mean(layers.elementwise_mul(loss, sequence_mask))
eval_metric_ops = fluid.layers.reduce_sum(correct_pred) \
- fluid.layers.reduce_sum(correct_unk) \ - fluid.layers.reduce_sum(correct_unk) \
- fluid.layers.reduce_sum(correct_pad) - fluid.layers.reduce_sum(correct_pad)
self.correct_ = correct_count
self.target_var_names_.append(correct_count)
loss = layers.softmax_with_cross_entropy(
logits=projection, label=y, soft_label=False)
loss = layers.reshape(loss, shape=[-1, num_steps], inplace=True)
loss = layers.reduce_mean(loss, dim=[0])
loss = layers.reduce_sum(loss)
self.loss_ = loss self.loss_ = loss
self.target_var_names_.append(loss) self.correct_ = eval_metric_opsself.input_name_list_ = [
'features', 'labels', 'seq_len_ph', 'seq_mask_ph', 'init_hidden',
loss.persistable = True 'init_cell'
]
# This will feed last_hidden, last_cell to init_hidden, init_cell, which self.target_var_names_ = [
# can be used directly in next batch. This can avoid the fetching of self.loss_, self.last_hidden_, self.last_cell_, self.correct_
# last_hidden and last_cell and feeding of init_hidden and init_cell in ]
# each training step.
#last_hidden = layers.transpose(last_hidden, perm=[1, 0, 2])
#last_cell = layers.transpose(last_cell, perm=[1, 0, 2])
#self.input_name_list_ = ['x', 'y', 'init_hidden', 'init_cell']
self.program_ = fluid.default_main_program() self.program_ = fluid.default_main_program()
self.startup_program_ = fluid.default_startup_program() self.startup_program_ = fluid.default_startup_program()
def _padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): def _build_rnn_graph(self, inputs, init_hidden, init_cell,
weight_1_arr = [] sequence_length_ph):
weight_2_arr = [] rnn_out, last_hidden, last_cell = basic_lstm(
bias_arr = [] input=inputs,
hidden_array = [] init_hidden=init_hidden,
cell_array = [] init_cell=init_cell,
mask_array = [] hidden_size=self.n_hidden_,
hidden_size = self.hidden_size_ num_layers=self.num_layers_,
init_scale = self.init_scale_ batch_first=True,
dropout = slef.dropout_ dropout_prob=self.dropout_prob_,
num_layers = self.num_layers_ sequence_length=sequence_length_ph,
num_steps = self._num_steps_ param_attr=ParamAttr(
for i in range(num_layers): initializer=fluid.initializer.UniformInitializer(
weight_1 = layers.create_parameter( low=-self.init_scale_, high=self.init_scale_)),
[hidden_size * 2, hidden_size * 4], bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0)),
dtype="float32", forget_bias=0.0)
name="fc_weight1_" + str(i), return rnn_out, last_hidden, last_cell
default_initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale))
weight_1_arr.append(weight_1)
bias_1 = layers.create_parameter(
[hidden_size * 4],
dtype="float32",
name="fc_bias1_" + str(i),
default_initializer=fluid.initializer.Constant(0.0))
bias_arr.append(bias_1)
pre_hidden = layers.slice(
init_hidden, axes=[0], starts=[i], ends=[i + 1])
pre_cell = layers.slice(
init_cell, axes=[0], starts=[i], ends=[i + 1])
pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size])
pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size])
hidden_array.append(pre_hidden)
cell_array.append(pre_cell)
input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2])
rnn = PaddingRNN()
with rnn.step():
input = rnn.step_input(input_embedding)
for k in range(num_layers):
pre_hidden = rnn.memory(init=hidden_array[k])
pre_cell = rnn.memory(init=cell_array[k])
weight_1 = weight_1_arr[k]
bias = bias_arr[k]
nn = layers.concat([input, pre_hidden], 1)
gate_input = layers.matmul(x=nn, y=weight_1)
gate_input = layers.elementwise_add(gate_input, bias)
i = layers.slice(
gate_input, axes=[1], starts=[0], ends=[hidden_size])
j = layers.slice(
gate_input,
axes=[1],
starts=[hidden_size],
ends=[hidden_size * 2])
f = layers.slice(
gate_input,
axes=[1],
starts=[hidden_size * 2],
ends=[hidden_size * 3])
o = layers.slice(
gate_input,
axes=[1],
starts=[hidden_size * 3],
ends=[hidden_size * 4])
c = pre_cell * layers.sigmoid(f) + layers.sigmoid(
i) * layers.tanh(j)
m = layers.tanh(c) * layers.sigmoid(o)
rnn.update_memory(pre_hidden, m)
rnn.update_memory(pre_cell, c)
rnn.step_output(m)
rnn.step_output(c)
input = m
if dropout != None and dropout > 0.0:
input = layers.dropout(
input,
dropout_prob=dropout,
dropout_implementation='upscale_in_train')
rnn.step_output(input)
rnnout = rnn()
last_hidden_array = []
last_cell_array = []
real_res = rnnout[-1]
for i in range(num_layers):
m = rnnout[i * 2]
c = rnnout[i * 2 + 1]
m.stop_gradient = True
c.stop_gradient = True
last_h = layers.slice(
m, axes=[0], starts=[num_steps - 1], ends=[num_steps])
last_hidden_array.append(last_h)
last_c = layers.slice(
c, axes=[0], starts=[num_steps - 1], ends=[num_steps])
last_cell_array.append(last_c)
real_res = layers.transpose(x=real_res, perm=[1, 0, 2])
last_hidden = layers.concat(last_hidden_array, 0)
last_cell = layers.concat(last_cell_array, 0)
return real_res, last_hidden, last_cell
def _encoder_static(input_embedding,
len=3,
init_hidden=None,
init_cell=None):
weight_1_arr = []
weight_2_arr = []
bias_arr = []
hidden_array = []
cell_array = []
mask_array = []
hidden_size = self.hidden_size_
init_scale = self.init_scale_
dropout = slef.dropout_
num_layers = self.num_layers_
for i in range(num_layers):
weight_1 = layers.create_parameter(
[hidden_size * 2, hidden_size * 4],
dtype="float32",
name="fc_weight1_" + str(i),
default_initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale))
weight_1_arr.append(weight_1)
bias_1 = layers.create_parameter(
[hidden_size * 4],
dtype="float32",
name="fc_bias1_" + str(i),
default_initializer=fluid.initializer.Constant(0.0))
bias_arr.append(bias_1)
pre_hidden = layers.slice(
init_hidden, axes=[0], starts=[i], ends=[i + 1])
pre_cell = layers.slice(
init_cell, axes=[0], starts=[i], ends=[i + 1])
pre_hidden = layers.reshape(
pre_hidden, shape=[-1, hidden_size], inplace=True)
pre_cell = layers.reshape(
pre_cell, shape=[-1, hidden_size], inplace=True)
hidden_array.append(pre_hidden)
cell_array.append(pre_cell)
res = []
sliced_inputs = layers.split(
input_embedding, num_or_sections=len, dim=1)
for index in range(len):
input = sliced_inputs[index]
input = layers.reshape(
input, shape=[-1, hidden_size], inplace=True)
for k in range(num_layers):
pre_hidden = hidden_array[k]
pre_cell = cell_array[k]
weight_1 = weight_1_arr[k]
bias = bias_arr[k]
nn = layers.concat([input, pre_hidden], 1)
gate_input = layers.matmul(x=nn, y=weight_1)
gate_input = layers.elementwise_add(gate_input, bias)
i, j, f, o = layers.split(
gate_input, num_or_sections=4, dim=-1)
c = pre_cell * layers.sigmoid(f) + layers.sigmoid(
i) * layers.tanh(j)
m = layers.tanh(c) * layers.sigmoid(o)
hidden_array[k] = m
cell_array[k] = c
input = m
if dropout != None and dropout > 0.0:
input = layers.dropout(
input,
dropout_prob=dropout,
dropout_implementation='upscale_in_train')
res.append(input)
last_hidden = layers.concat(hidden_array, 1)
last_hidden = layers.reshape(
last_hidden, shape=[-1, num_layers, hidden_size], inplace=True)
last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2])
last_cell = layers.concat(cell_array, 1)
last_cell = layers.reshape(
last_cell, shape=[-1, num_layers, hidden_size])
last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2])
real_res = layers.concat(res, 0)
real_res = layers.reshape(
real_res, shape=[len, -1, hidden_size], inplace=True)
real_res = layers.transpose(x=real_res, perm=[1, 0, 2])
return real_res, last_hidden, last_cell
...@@ -108,7 +108,7 @@ def train_reader(lines): ...@@ -108,7 +108,7 @@ def train_reader(lines):
input_data, input_length = process_x(data_x, VOCAB) input_data, input_length = process_x(data_x, VOCAB)
target_data = process_y(data_y, VOCAB) target_data = process_y(data_y, VOCAB)
yield [input_data] + [target_data] yield [input_data] + [target_data] + [input_length] + [data_mask]
return local_iter return local_iter
......
...@@ -34,10 +34,10 @@ def train_one_user(arg_dict, trainer_config): ...@@ -34,10 +34,10 @@ def train_one_user(arg_dict, trainer_config):
max_training_steps = trainer_config["max_training_steps"] max_training_steps = trainer_config["max_training_steps"]
batch_size = trainer_config["batch_size"] batch_size = trainer_config["batch_size"]
# logging.info("training one user...") # logging.info("training one user...")
main_program = fluid.Program.parse_from_string(trainer_config[ main_program = fluid.Program.parse_from_string(
"main_program_desc"]) trainer_config["main_program_desc"])
startup_program = fluid.Program.parse_from_string(trainer_config[ startup_program = fluid.Program.parse_from_string(
"startup_program_desc"]) trainer_config["startup_program_desc"])
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
scope = fluid.global_scope() scope = fluid.global_scope()
...@@ -46,10 +46,9 @@ def train_one_user(arg_dict, trainer_config): ...@@ -46,10 +46,9 @@ def train_one_user(arg_dict, trainer_config):
exit() exit()
exe.run(startup_program) exe.run(startup_program)
feeder = fluid.DataFeeder( feeder = fluid.DataFeeder(feed_list=trainer_config["input_names"],
feed_list=trainer_config["input_names"], place=place,
place=place, program=main_program)
program=main_program)
data_server_endpoints = arg_dict["data_endpoints"] data_server_endpoints = arg_dict["data_endpoints"]
# create data clients # create data clients
data_client = DataClient() data_client = DataClient()
...@@ -76,36 +75,43 @@ def train_one_user(arg_dict, trainer_config): ...@@ -76,36 +75,43 @@ def train_one_user(arg_dict, trainer_config):
epoch = trainer_config["epoch"] epoch = trainer_config["epoch"]
max_steps_in_epoch = trainer_config.get("max_steps_in_epoch", -1) max_steps_in_epoch = trainer_config.get("max_steps_in_epoch", -1)
metrics = trainer_config["metrics"] metrics = trainer_config["metrics"]
metric_keys = metrics.keys() fetch_list = []
fetch_list = [main_program.global_block().var(trainer_config["loss_name"])] for var in trainer_config["target_names"]:
for key in metric_keys: fetch_list.append(var)
fetch_list.append(main_program.global_block().var(metrics[key]))
seq_len = 10
for ei in range(epoch): for ei in range(epoch):
fetch_res_list = []
trained_sample_num = 0 trained_sample_num = 0
step = 0 step = 0
fetch_res_list = [] num_layers = trainer_config["num_layers"]
total_loss = 0.0 hidden_size = trainer_config["n_hidden"]
total_correct = 0 tot_loss, tot_correct = 0, 0
tot_samples = 0
init_hidden, init_cell = generate_init_data(batch_size, num_layers,
hidden_size)
for data in train_reader(): for data in train_reader():
feed_data, input_lengths = prepare_input(batch_size, data,
init_hidden, init_cell)
fetch_res = exe.run(main_program, fetch_res = exe.run(main_program,
feed=feeder.feed(data), feed=feeder.feed(feed_data),
fetch_list=fetch_list) fetch_list=fetch_list)
loss, last_hidden, last_cell, correct = fetch_res
init_hidden = np.array(last_hidden)
init_cell = np.array(last_cell)
tot_loss += np.array(loss)
tot_correct += np.array(correct)
tot_samples += np.sum(input_lengths)
step += 1 step += 1
trained_sample_num += len(data) trained_sample_num += len(data)
fetch_res_list.append([x[0] for x in fetch_res]) fetch_res_list.append([np.array(loss), np.array(correct)])
if max_steps_in_epoch != -1 and step >= max_steps_in_epoch: if max_steps_in_epoch != -1 and step >= max_steps_in_epoch:
break break
if show_metric and trained_sample_num > 0: if show_metric and trained_sample_num > 0:
loss = sum([x[0] for x in fetch_res_list]) / trained_sample_num loss = tot_loss / step
print("loss: {}, ppl: {}".format(loss, np.exp(loss))) acc = float(tot_correct) / tot_samples
for i, key in enumerate(metric_keys): print("loss: {}, acc: {}".format(loss, acc))
if key == "correct":
value = float(sum([x[i + 1] for x in fetch_res_list
])) / trained_sample_num
print("correct: {}".format(value / seq_len))
local_updated_param_dict = {} local_updated_param_dict = {}
# update user param # update user param
...@@ -142,10 +148,10 @@ def infer_one_user(arg_dict, trainer_config): ...@@ -142,10 +148,10 @@ def infer_one_user(arg_dict, trainer_config):
# run startup program, set params # run startup program, set params
uid = arg_dict["uid"] uid = arg_dict["uid"]
batch_size = trainer_config["batch_size"] batch_size = trainer_config["batch_size"]
startup_program = fluid.Program.parse_from_string(trainer_config[ startup_program = fluid.Program.parse_from_string(
"startup_program_desc"]) trainer_config["startup_program_desc"])
infer_program = fluid.Program.parse_from_string(trainer_config[ infer_program = fluid.Program.parse_from_string(
"infer_program_desc"]) trainer_config["infer_program_desc"])
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
scope = fluid.global_scope() scope = fluid.global_scope()
...@@ -169,7 +175,6 @@ def infer_one_user(arg_dict, trainer_config): ...@@ -169,7 +175,6 @@ def infer_one_user(arg_dict, trainer_config):
arg_dict["global_params"], scope) arg_dict["global_params"], scope)
# reader # reader
date = arg_dict["date"] date = arg_dict["date"]
global_param_dict = arg_dict["global_params"] global_param_dict = arg_dict["global_params"]
user_data = data_client.get_data_by_uid(uid, date) user_data = data_client.get_data_by_uid(uid, date)
...@@ -179,36 +184,60 @@ def infer_one_user(arg_dict, trainer_config): ...@@ -179,36 +184,60 @@ def infer_one_user(arg_dict, trainer_config):
# run infer program # run infer program
os.mkdir(arg_dict["infer_result_dir"]) os.mkdir(arg_dict["infer_result_dir"])
#pred_file = open(arg_dict["infer_result_dir"] + '/' + "pred_file", "w") #pred_file = open(arg_dict["infer_result_dir"] + '/' + "pred_file", "w")
feeder = fluid.DataFeeder( feeder = fluid.DataFeeder(feed_list=trainer_config["input_names"],
feed_list=trainer_config["input_names"], place=place,
place=place, program=infer_program)
program=infer_program)
fetch_list = trainer_config["target_names"] fetch_list = trainer_config["target_names"]
#logging.info("fetch_list: {}".format(fetch_list)) #logging.info("fetch_list: {}".format(fetch_list))
fetch_res = [] fetch_res = []
sample_count = 0 sample_count = 0
total_loss = 0.0 num_layers = trainer_config["num_layers"]
total_correct = 0 hidden_size = trainer_config["n_hidden"]
iters = 0 tot_correct, tot_loss = 0, 0
steps = 0 tot_samples, tot_batches = 0, 0
seq_len = 10 init_hidden, init_cell = generate_init_data(batch_size, num_layers,
hidden_size)
for data in infer_reader(): for data in infer_reader():
# feed_data = [x["features"] + [x["label"]] for x in data] feed_data, input_lengths = prepare_input(batch_size, data, init_hidden,
# prediction, acc_val= exe.run(infer_program, init_cell)
pred, correct_count, loss = exe.run(infer_program, fetch_res = exe.run(infer_program,
feed=feeder.feed(data), feed=feeder.feed(feed_data),
fetch_list=fetch_list) fetch_list=fetch_list)
total_loss += loss loss, last_hidden, last_cell, correct = fetch_res
total_correct += correct_count
steps += 1 cost_eval = np.array(loss)
sample_count += len(data) init_hidden = np.array(last_hidden)
init_cell = np.array(last_cell)
correct = float(total_correct) / (seq_len * sample_count) correct_val = np.array(correct)
# logging.info("correct: {}".format(correct)) tot_loss += cost_eval
tot_correct += correct_val
tot_samples += np.sum(input_lengths)
tot_batches += 1
loss = tot_loss / tot_batches
acc = float(tot_correct) / tot_samples
logging.info("infer acc: {}".format(acc))
with open(arg_dict["infer_result_dir"] + "/res", "w") as f: with open(arg_dict["infer_result_dir"] + "/res", "w") as f:
f.write("%d\t%f\n" % (1, correct)) f.write("%d\t%f\n" % (1, acc))
def prepare_input(batch_size, data, init_hidden, init_cell):
init_hidden = np.split(init_hidden, batch_size)
init_cell = np.split(init_cell, batch_size)
data = [[features] + [labels] + [seq_len_ph] + [seq_mask_ph] + [init_hidden[i]] + [init_cell[i] ] \
for i, (features, labels, seq_len_ph, seq_mask_ph) in enumerate(data)]
input_lengths = [x[2] for x in data]
return data, input_lengths
def generate_init_data(batch_size, num_layers, hidden_size):
init_hidden = np.zeros((batch_size, num_layers, hidden_size),
dtype='float32')
init_cell = np.zeros((batch_size, num_layers, hidden_size),
dtype='float32')
return init_hidden, init_cell
def save_and_upload(arg_dict, trainer_config, dfs_upload_path): def save_and_upload(arg_dict, trainer_config, dfs_upload_path):
...@@ -219,7 +248,6 @@ def save_and_upload(arg_dict, trainer_config, dfs_upload_path): ...@@ -219,7 +248,6 @@ def save_and_upload(arg_dict, trainer_config, dfs_upload_path):
def evaluate_a_group(group): def evaluate_a_group(group):
group_list = [] group_list = []
for label, pred, _ in group: for label, pred, _ in group:
# print("%s\t%s\n" % (label, pred))
group_list.append((int(label), float(pred))) group_list.append((int(label), float(pred)))
random.shuffle(group_list) random.shuffle(group_list)
labels = [x[0] for x in group_list] labels = [x[0] for x in group_list]
...@@ -236,7 +264,6 @@ class LanguageModelTrainer(TrainerBase): ...@@ -236,7 +264,6 @@ class LanguageModelTrainer(TrainerBase):
""" """
LanguageModelTrainer only support training with PaddlePaddle LanguageModelTrainer only support training with PaddlePaddle
""" """
def __init__(self): def __init__(self):
super(LanguageModelTrainer, self).__init__() super(LanguageModelTrainer, self).__init__()
self.main_program_ = fluid.Program() self.main_program_ = fluid.Program()
...@@ -270,10 +297,13 @@ class LanguageModelTrainer(TrainerBase): ...@@ -270,10 +297,13 @@ class LanguageModelTrainer(TrainerBase):
""" """
with fluid.program_guard(self.main_program_, self.startup_program_): with fluid.program_guard(self.main_program_, self.startup_program_):
self.input_model_ = LanguageModel() self.input_model_ = LanguageModel()
model_configs = {} model_configs = self.trainer_config
self.input_model_.build_model(model_configs) self.input_model_.build_model(model_configs)
optimizer = fluid.optimizer.SGD( optimizer = fluid.optimizer.SGD(
learning_rate=self.trainer_config["lr"]) learning_rate=self.trainer_config["lr"],
grad_clip=fluid.clip.GradientClipByGlobalNorm(
clip_norm=self.trainer_config["max_grad_norm"]))
optimizer.minimize(self.input_model_.get_model_loss()) optimizer.minimize(self.input_model_.get_model_loss())
self.main_program_desc_ = self.main_program_.desc.serialize_to_string() self.main_program_desc_ = self.main_program_.desc.serialize_to_string()
...@@ -283,13 +313,16 @@ class LanguageModelTrainer(TrainerBase): ...@@ -283,13 +313,16 @@ class LanguageModelTrainer(TrainerBase):
self.input_model_.get_model_loss_name()) self.input_model_.get_model_loss_name())
self.update_trainer_configs( self.update_trainer_configs(
"input_names", "input_names",
self.input_model_.get_model_input_names(), ) self.input_model_.get_model_input_names(),
)
self.update_trainer_configs( self.update_trainer_configs(
"target_names", "target_names",
self.input_model_.get_target_names(), ) self.input_model_.get_target_names(),
)
self.update_trainer_configs( self.update_trainer_configs(
"metrics", "metrics",
self.input_model_.get_model_metrics(), ) self.input_model_.get_model_metrics(),
)
self.update_trainer_configs("show_metric", True) self.update_trainer_configs("show_metric", True)
self.update_trainer_configs("max_training_steps", "inf") self.update_trainer_configs("max_training_steps", "inf")
self.update_trainer_configs("shuffle", False) self.update_trainer_configs("shuffle", False)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册