train.py 3.3 KB
Newer Older
O
overlordmax 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
import numpy as np
import pandas as pd
import os
import random
import paddle.fluid as fluid
from youtubednn import YoutubeDNN
import paddle
import args
import logging
import time

logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)

def train(args):
    youtube_model = YoutubeDNN()
    inputs = youtube_model.input_data(args.watch_vec_size, args.search_vec_size, args.other_feat_size)
    loss, acc, l3 = youtube_model.net(inputs, args.output_size, layers=[128, 64, 32])

    sgd = fluid.optimizer.SGD(learning_rate=args.base_lr)
    sgd.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # Build a random data set.
    sample_size = 100
    watch_vecs = []
    search_vecs = []
    other_feats = []
    labels = []

    for i in range(sample_size):
        watch_vec = np.random.rand(args.batch_size, args.watch_vec_size)
        search_vec = np.random.rand(args.batch_size, args.search_vec_size)
        other_feat = np.random.rand(args.batch_size, args.other_feat_size)
        watch_vecs.append(watch_vec)
        search_vecs.append(search_vec)
        other_feats.append(other_feat)
        label = np.random.randint(args.output_size, size=(args.batch_size, 1))
        labels.append(label)
    for epoch in range(args.epochs):
        for i in range(sample_size):
            begin = time.time()
            loss_data, acc_val = exe.run(fluid.default_main_program(),
                                feed={
                                    "watch_vec": watch_vecs[i].astype('float32'),
                                    "search_vec": search_vecs[i].astype('float32'),
                                    "other_feat": other_feats[i].astype('float32'),
                                    "label": np.array(labels[i]).reshape(args.batch_size, 1)
                                },
                                return_numpy=True,
                                fetch_list=[loss.name, acc.name])
            end = time.time()
            logger.info("epoch_id: {}, batch_time: {:.5f}s, loss: {:.5f}, acc: {:.5f}".format(
                epoch, end-begin, float(np.array(loss_data)), np.array(acc_val)[0]))
        #save model
        model_dir = os.path.join(args.model_dir, 'epoch_' + str(epoch + 1), "checkpoint")

        feed_var_names = ["watch_vec", "search_vec", "other_feat"]
        fetch_vars = [l3]
        fluid.io.save_inference_model(model_dir, feed_var_names, fetch_vars, exe)

    #save all video vector
    video_array = np.array(fluid.global_scope().find_var('l4_weight').get_tensor())
    video_vec = pd.DataFrame(video_array)
    video_vec.to_csv(args.video_vec_path, mode="a", index=False, header=0)

if __name__ == "__main__":
    args = args.parse_args()
Y
yudongxu(许煜东) 已提交
73 74 75
    logger.info("use_gpu: {}, batch_size: {}, epochs: {}, watch_vec_size: {}, search_vec_size: {}, other_feat_size: {}, output_size: {}, model_dir: {}, test_epoch: {}, base_lr: {}, video_vec_path: {}".format(
        args.use_gpu, args.batch_size, args.epochs, args.watch_vec_size, args.search_vec_size, args.other_feat_size, args.output_size, args.model_dir, args.test_epoch, args.base_lr, args.video_vec_path))
    
O
overlordmax 已提交
76 77 78
    if(os.path.exists(args.video_vec_path)):
        os.system("rm " + args.video_vec_path)
    train(args)