提交 0f3eeda1 编写于 作者: Q Qiao Longfei

update train and test

上级 cc56cb2e
......@@ -2,17 +2,19 @@ import os
import gzip
import argparse
import itertools
import numpy as np
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
from network_conf import DeepFM
import reader
def parse_args():
parser = argparse.ArgumentParser(description="PaddlePaddle DeepFM example")
parser.add_argument(
'--model_gz_path',
'--model_path',
type=str,
required=True,
help="The path of model parameters gz file")
......@@ -21,11 +23,6 @@ def parse_args():
type=str,
required=True,
help="The path of the dataset to infer")
parser.add_argument(
'--prediction_output_path',
type=str,
required=True,
help="The path to output the prediction")
parser.add_argument(
'--factor_size',
type=int,
......@@ -38,25 +35,43 @@ def parse_args():
def infer():
args = parse_args()
paddle.init(use_gpu=False, trainer_count=1)
model = DeepFM(args.factor_size, infer=True)
parameters = paddle.parameters.Parameters.from_tar(
gzip.open(args.model_gz_path, 'r'))
inferer = paddle.inference.Inference(
output_layer=model, parameters=parameters)
place = fluid.CPUPlace()
inference_scope = fluid.core.Scope()
dataset = reader.Dataset()
infer_reader = paddle.batch(dataset.infer(args.data_path), batch_size=1000)
with open(args.prediction_output_path, 'w') as out:
for id, batch in enumerate(infer_reader()):
res = inferer.infer(input=batch)
predictions = [x for x in itertools.chain.from_iterable(res)]
out.write('\n'.join(map(str, predictions)) + '\n')
test_reader = paddle.batch(dataset.train(args.data_path), batch_size=1000)
startup_program = fluid.framework.Program()
test_program = fluid.framework.Program()
with fluid.framework.program_guard(test_program, startup_program):
loss, data_list, auc_var, batch_auc_var = DeepFM(args.factor_size)
exe = fluid.Executor(place)
#exe.run(startup_program)
feeder = fluid.DataFeeder(feed_list=data_list, place=place)
with fluid.scope_guard(inference_scope):
[inference_program, _, fetch_targets] = fluid.io.load_inference_model(args.model_path, exe)
print(fetch_targets)
def set_zero(var_name):
param = inference_scope.var(var_name).get_tensor()
param_array = np.zeros(param._get_dims()).astype("int64")
param.set(param_array, place)
auc_states_names = ['_generated_var_2', '_generated_var_3']
for name in auc_states_names:
set_zero(name)
batch_id = 0
for data in test_reader():
loss_val, auc_val = exe.run(inference_program,
feed=feeder.feed(data),
fetch_list=fetch_targets)
if batch_id % 100 == 0:
print("loss: " + str(loss_val) + " auc_val:" + str(auc_val))
batch_id += 1
if __name__ == '__main__':
......
import paddle.fluid as fluid
import math
dense_feature_dim = 13
sparse_feature_dim = 117568
......@@ -17,15 +18,19 @@ def DeepFM(factor_size, infer=False):
return fluid.layers.embedding(
input=input,
size=[sparse_feature_dim, factor_size],
param_attr=fluid.ParamAttr(name="SparseFeatFactors"))
param_attr=fluid.ParamAttr(name="SparseFeatFactors", initializer=fluid.initializer.Normal(scale=1/math.sqrt(sparse_feature_dim))))
sparse_embed_seq = map(embedding_layer, sparse_input_ids)
concated = fluid.layers.concat(sparse_embed_seq + [dense_input], axis=1)
fc1 = fluid.layers.fc(input=concated, size=400, act='relu')
fc2 = fluid.layers.fc(input=fc1, size=400, act='relu')
fc3 = fluid.layers.fc(input=fc2, size=400, act='relu')
predict = fluid.layers.fc(input=fc3, size=2, act='sigmoid')
fc1 = fluid.layers.fc(input=concated, size=400, act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(scale=1/math.sqrt(concated.shape[1]))))
fc2 = fluid.layers.fc(input=fc1, size=400, act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(scale=1/math.sqrt(fc1.shape[1]))))
fc3 = fluid.layers.fc(input=fc2, size=400, act='relu',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(scale=1/math.sqrt(fc2.shape[1]))))
predict = fluid.layers.fc(input=fc3, size=2, act='softmax',
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(scale=1/math.sqrt(fc3.shape[1]))))
data_list = [dense_input] + sparse_input_ids
......
......@@ -68,17 +68,25 @@ def train():
place = fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=data_list, place=place)
data_name_list = [var.name for var in data_list]
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
for pass_id in range(args.num_passes):
batch_id = 0
for data in train_reader():
loss_val, auc_val, batch_auc_val = exe.run(
fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[loss, auc_var, batch_auc_var]
)
print('loss :' + str(loss_val) + " auc : " + str(auc_val) + " batch_auc : " + str(batch_auc_val))
print('pass:' + str(pass_id) + ' batch:' + str(batch_id) + ' loss: ' + str(loss_val) + " auc: " + str(auc_val) + " batch_auc: " + str(batch_auc_val))
batch_id += 1
if batch_id % 100 == 0 and batch_id != 0:
model_dir = 'output/batch-' + str(batch_id)
fluid.io.save_inference_model(model_dir, data_name_list, [loss, auc_var], exe)
model_dir = 'output/pass-' + str(pass_id)
fluid.io.save_inference_model(model_dir, data_name_list, [loss_var, auc_var], exe)
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册