local_train.py 3.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
from __future__ import print_function

from args import parse_args
import os
import paddle.fluid as fluid
import sys
from network_confv6 import ctr_deepfm_dataset

NUM_CONTEXT_FEATURE = 22
DIM_USER_PROFILE = 10
DIM_DENSE_FEATURE = 3
12 13
PYTHON_PATH = "/home/yaoxuefeng/whls/paddle_release_home/python/bin/python"  # this is mine change yours

14 15 16 17 18

def train():
    args = parse_args()
    if not os.path.isdir(args.model_output_dir):
        os.mkdir(args.model_output_dir)
19

20 21
    #set the input format for our model. Note that you need to carefully modify them when you define a new network
    #user_profile = fluid.layers.data(
22
    #name="user_profile", shape=[DIM_USER_PROFILE], dtype='int64', lod_level=1)
23 24 25
    dense_feature = fluid.layers.data(
        name="dense_feature", shape=[DIM_DENSE_FEATURE], dtype='float32')
    context_feature = [
26 27 28 29
        fluid.layers.data(
            name="context" + str(i), shape=[1], lod_level=1, dtype="int64")
        for i in range(0, NUM_CONTEXT_FEATURE)
    ]
30 31 32 33 34 35
    context_feature_fm = fluid.layers.data(
        name="context_fm", shape=[1], dtype='int64', lod_level=1)
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    print("ready to network")
    #self define network 
36 37 38
    loss, auc_var, batch_auc_var, accuracy, predict = ctr_deepfm_dataset(
        dense_feature, context_feature, context_feature_fm, label,
        args.embedding_size, args.sparse_feature_dim)
39 40 41 42 43 44 45 46 47

    print("ready to optimize")
    optimizer = fluid.optimizer.SGD(learning_rate=1e-4)
    optimizer.minimize(loss)
    #single machine CPU training. more options on trainig please visit PaddlePaddle site
    exe = fluid.Executor(fluid.CPUPlace())
    exe.run(fluid.default_startup_program())
    #use dataset api for much faster speed
    dataset = fluid.DatasetFactory().create_dataset()
48 49
    dataset.set_use_var([dense_feature] + context_feature +
                        [context_feature_fm] + [label])
50 51 52 53 54 55 56
    #self define how to process generated training insatnces in map_reader.py
    pipe_command = PYTHON_PATH + "  map_reader.py %d" % args.sparse_feature_dim
    dataset.set_pipe_command(pipe_command)
    dataset.set_batch_size(args.batch_size)
    thread_num = 1
    dataset.set_thread(thread_num)
    #self define how to split training files for example:"split -a 2 -d -l 200000 normed_train.txt normed_train"
57 58 59 60 61 62 63 64 65
    whole_filelist = [
        "./out/normed_train%d" % x for x in range(len(os.listdir("out")))
    ]
    whole_filelist = [
        "./out/normed_train00", "./out/normed_train01", "./out/normed_train02",
        "./out/normed_train03", "./out/normed_train04", "./out/normed_train05",
        "./out/normed_train06", "./out/normed_train07", "./out/normed_train08",
        "./out/normed_train09", "./out/normed_train10", "./out/normed_train11"
    ]
66 67 68 69 70 71
    print("ready to epochs")
    epochs = 10
    for i in range(epochs):
        print("start %dth epoch" % i)
        dataset.set_filelist(whole_filelist[:int(len(whole_filelist))])
        #print the informations you want by setting fetch_list and fetch_info
72 73 74 75 76 77 78 79
        exe.train_from_dataset(
            program=fluid.default_main_program(),
            dataset=dataset,
            fetch_list=[auc_var, accuracy, predict, label],
            fetch_info=["auc", "accuracy", "predict", "label"],
            debug=False)
        model_dir = os.path.join(args.model_output_dir,
                                 '/epoch' + str(i + 1) + ".model")
80 81
        sys.stderr.write("epoch%d finished" % (i + 1))
        #save model
82 83 84 85 86
        fluid.io.save_inference_model(
            model_dir,
            [dense_feature.name] + [x.name for x in context_feature] +
            [context_feature_fm.name] + [label.name],
            [loss, auc_var, accuracy, predict, label], exe)
87 88 89 90


if __name__ == '__main__':
    train()