提交 0f3eeda1 编写于 作者: Q Qiao Longfei

update train and test

上级 cc56cb2e
...@@ -2,17 +2,19 @@ import os ...@@ -2,17 +2,19 @@ import os
import gzip import gzip
import argparse import argparse
import itertools import itertools
import numpy as np
import paddle.v2 as paddle import paddle
import paddle.fluid as fluid
from network_conf import DeepFM from network_conf import DeepFM
import reader import reader
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description="PaddlePaddle DeepFM example") parser = argparse.ArgumentParser(description="PaddlePaddle DeepFM example")
parser.add_argument( parser.add_argument(
'--model_gz_path', '--model_path',
type=str, type=str,
required=True, required=True,
help="The path of model parameters gz file") help="The path of model parameters gz file")
...@@ -21,11 +23,6 @@ def parse_args(): ...@@ -21,11 +23,6 @@ def parse_args():
type=str, type=str,
required=True, required=True,
help="The path of the dataset to infer") help="The path of the dataset to infer")
parser.add_argument(
'--prediction_output_path',
type=str,
required=True,
help="The path to output the prediction")
parser.add_argument( parser.add_argument(
'--factor_size', '--factor_size',
type=int, type=int,
...@@ -38,25 +35,43 @@ def parse_args(): ...@@ -38,25 +35,43 @@ def parse_args():
def infer(): def infer():
args = parse_args() args = parse_args()
paddle.init(use_gpu=False, trainer_count=1) place = fluid.CPUPlace()
inference_scope = fluid.core.Scope()
model = DeepFM(args.factor_size, infer=True)
parameters = paddle.parameters.Parameters.from_tar(
gzip.open(args.model_gz_path, 'r'))
inferer = paddle.inference.Inference(
output_layer=model, parameters=parameters)
dataset = reader.Dataset() dataset = reader.Dataset()
test_reader = paddle.batch(dataset.train(args.data_path), batch_size=1000)
infer_reader = paddle.batch(dataset.infer(args.data_path), batch_size=1000)
startup_program = fluid.framework.Program()
with open(args.prediction_output_path, 'w') as out: test_program = fluid.framework.Program()
for id, batch in enumerate(infer_reader()): with fluid.framework.program_guard(test_program, startup_program):
res = inferer.infer(input=batch) loss, data_list, auc_var, batch_auc_var = DeepFM(args.factor_size)
predictions = [x for x in itertools.chain.from_iterable(res)]
out.write('\n'.join(map(str, predictions)) + '\n') exe = fluid.Executor(place)
#exe.run(startup_program)
feeder = fluid.DataFeeder(feed_list=data_list, place=place)
with fluid.scope_guard(inference_scope):
[inference_program, _, fetch_targets] = fluid.io.load_inference_model(args.model_path, exe)
print(fetch_targets)
def set_zero(var_name):
param = inference_scope.var(var_name).get_tensor()
param_array = np.zeros(param._get_dims()).astype("int64")
param.set(param_array, place)
auc_states_names = ['_generated_var_2', '_generated_var_3']
for name in auc_states_names:
set_zero(name)
batch_id = 0
for data in test_reader():
loss_val, auc_val = exe.run(inference_program,
feed=feeder.feed(data),
fetch_list=fetch_targets)
if batch_id % 100 == 0:
print("loss: " + str(loss_val) + " auc_val:" + str(auc_val))
batch_id += 1
if __name__ == '__main__': if __name__ == '__main__':
......
import paddle.fluid as fluid import paddle.fluid as fluid
import math
dense_feature_dim = 13 dense_feature_dim = 13
sparse_feature_dim = 117568 sparse_feature_dim = 117568
...@@ -17,15 +18,19 @@ def DeepFM(factor_size, infer=False): ...@@ -17,15 +18,19 @@ def DeepFM(factor_size, infer=False):
return fluid.layers.embedding( return fluid.layers.embedding(
input=input, input=input,
size=[sparse_feature_dim, factor_size], size=[sparse_feature_dim, factor_size],
param_attr=fluid.ParamAttr(name="SparseFeatFactors")) param_attr=fluid.ParamAttr(name="SparseFeatFactors", initializer=fluid.initializer.Normal(scale=1/math.sqrt(sparse_feature_dim))))
sparse_embed_seq = map(embedding_layer, sparse_input_ids) sparse_embed_seq = map(embedding_layer, sparse_input_ids)
concated = fluid.layers.concat(sparse_embed_seq + [dense_input], axis=1) concated = fluid.layers.concat(sparse_embed_seq + [dense_input], axis=1)
fc1 = fluid.layers.fc(input=concated, size=400, act='relu') fc1 = fluid.layers.fc(input=concated, size=400, act='relu',
fc2 = fluid.layers.fc(input=fc1, size=400, act='relu') param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(scale=1/math.sqrt(concated.shape[1]))))
fc3 = fluid.layers.fc(input=fc2, size=400, act='relu') fc2 = fluid.layers.fc(input=fc1, size=400, act='relu',
predict = fluid.layers.fc(input=fc3, size=2, act='sigmoid') param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(scale=1/math.sqrt(fc1.shape[1]))))
fc3 = fluid.layers.fc(input=fc2, size=400, act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(scale=1/math.sqrt(fc2.shape[1]))))
predict = fluid.layers.fc(input=fc3, size=2, act='softmax',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(scale=1/math.sqrt(fc3.shape[1]))))
data_list = [dense_input] + sparse_input_ids data_list = [dense_input] + sparse_input_ids
......
...@@ -68,17 +68,25 @@ def train(): ...@@ -68,17 +68,25 @@ def train():
place = fluid.CPUPlace() place = fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=data_list, place=place) feeder = fluid.DataFeeder(feed_list=data_list, place=place)
data_name_list = [var.name for var in data_list]
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
for pass_id in range(args.num_passes): for pass_id in range(args.num_passes):
batch_id = 0
for data in train_reader(): for data in train_reader():
loss_val, auc_val, batch_auc_val = exe.run( loss_val, auc_val, batch_auc_val = exe.run(
fluid.default_main_program(), fluid.default_main_program(),
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[loss, auc_var, batch_auc_var] fetch_list=[loss, auc_var, batch_auc_var]
) )
print('loss :' + str(loss_val) + " auc : " + str(auc_val) + " batch_auc : " + str(batch_auc_val)) print('pass:' + str(pass_id) + ' batch:' + str(batch_id) + ' loss: ' + str(loss_val) + " auc: " + str(auc_val) + " batch_auc: " + str(batch_auc_val))
batch_id += 1
if batch_id % 100 == 0 and batch_id != 0:
model_dir = 'output/batch-' + str(batch_id)
fluid.io.save_inference_model(model_dir, data_name_list, [loss, auc_var], exe)
model_dir = 'output/pass-' + str(pass_id)
fluid.io.save_inference_model(model_dir, data_name_list, [loss_var, auc_var], exe)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册