From 99dcaf65102d9063e6610bd07b251661b6db8d43 Mon Sep 17 00:00:00 2001 From: SunGaofeng Date: Fri, 5 Apr 2019 06:26:19 +0000 Subject: [PATCH] add data preprocessing of nonlocal model --- .../video/dataset/nonlocal/change_filelist.py | 37 +++++++++++ .../dataset/nonlocal/generate_filelist.py | 65 +++++++++++++++++++ .../video/dataset/nonlocal/generate_list.sh | 12 ++++ .../nonlocal/generate_testlist_multicrop.py | 21 ++++++ 4 files changed, 135 insertions(+) create mode 100644 PaddleCV/video/dataset/nonlocal/change_filelist.py create mode 100644 PaddleCV/video/dataset/nonlocal/generate_filelist.py create mode 100644 PaddleCV/video/dataset/nonlocal/generate_list.sh create mode 100644 PaddleCV/video/dataset/nonlocal/generate_testlist_multicrop.py diff --git a/PaddleCV/video/dataset/nonlocal/change_filelist.py b/PaddleCV/video/dataset/nonlocal/change_filelist.py new file mode 100644 index 00000000..0426164f --- /dev/null +++ b/PaddleCV/video/dataset/nonlocal/change_filelist.py @@ -0,0 +1,37 @@ +# Copyright (c) 2017-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# + +import sys +import numpy as np +import random + +# src = 'trainlist_download.txt' +# outlist = 'trainlist.txt' +# original_folder = '/nfs.yoda/xiaolonw/kinetics/data/train' +# replace_folder = '/scratch/xiaolonw/kinetics/data/compress/train_256' +assert (len(sys.argv) == 5) + +src = sys.argv[1] +outlist = sys.argv[2] +original_folder = sys.argv[3] +replace_folder = sys.argv[4] + +f = open(src, 'r') +flist = [] +for line in f: + flist.append(line) +f.close() + +f2 = open(outlist, 'w') + +listlen = len(flist) +for i in range(listlen): + line = flist[i] + line = line.replace(original_folder, replace_folder) + f2.write(line) + +f2.close() diff --git a/PaddleCV/video/dataset/nonlocal/generate_filelist.py b/PaddleCV/video/dataset/nonlocal/generate_filelist.py new file mode 100644 index 00000000..c69ac952 --- /dev/null +++ b/PaddleCV/video/dataset/nonlocal/generate_filelist.py @@ -0,0 +1,65 @@ +import os +import numpy as np +import sys + +num_classes = 400 +replace_space_by_underliner = True # whether to replace space by '_' in labels + +fn = sys.argv[1] #'trainlist_download400.txt' +train_dir = sys.argv[ + 2] #'/docker_mount/data/k400/Kinetics_trimmed_processed_train' +val_dir = sys.argv[3] #'/docker_mount/data/k400/Kinetics_trimmed_processed_val' +trainlist = sys.argv[4] #'trainlist.txt' +vallist = sys.argv[5] #'vallist.txt' + +fl = open(fn).readlines() +fl = [line.strip() for line in fl if line.strip() != ''] +action_list = [] + +for line in fl[1:]: + act = line.split(',')[0].strip('\"') + action_list.append(act) + +action_set = set(action_list) +action_list = list(action_set) +action_list.sort() +if replace_space_by_underliner: + action_list = [item.replace(' ', '_') for item in action_list] + +# assign integer label to each category, abseiling is labeled as 0, +# zumba labeled as 399 and so on, sorted by the category name +action_label_dict = {} +for i in range(len(action_list)): + key = action_list[i] + action_label_dict[key] = i + +assert len(action_label_dict.keys( +)) == num_classes, "action num should be {}".format(num_classes) + + +def generate_file(Faction_label_dict, Ftrain_dir, Ftrainlist, Fnum_classes): + trainactions = os.listdir(Ftrain_dir) + trainactions.sort() + assert len( + trainactions) == Fnum_classes, "train action num should be {}".format( + Fnum_classes) + + train_items = [] + trainlist_outfile = open(Ftrainlist, 'w') + for trainaction in trainactions: + assert trainaction in Faction_label_dict.keys( + ), "action {} should be in action_dict".format(trainaction) + trainaction_dir = os.path.join(Ftrain_dir, trainaction) + trainaction_label = Faction_label_dict[trainaction] + trainaction_files = os.listdir(trainaction_dir) + for f in trainaction_files: + fn = os.path.join(trainaction_dir, f) + item = fn + ' ' + str(trainaction_label) + train_items.append(item) + trainlist_outfile.write(item + '\n') + trainlist_outfile.flush() + trainlist_outfile.close() + + +generate_file(action_label_dict, train_dir, trainlist, num_classes) +generate_file(action_label_dict, val_dir, vallist, num_classes) diff --git a/PaddleCV/video/dataset/nonlocal/generate_list.sh b/PaddleCV/video/dataset/nonlocal/generate_list.sh new file mode 100644 index 00000000..77b59a52 --- /dev/null +++ b/PaddleCV/video/dataset/nonlocal/generate_list.sh @@ -0,0 +1,12 @@ +# Download txt name +TRAINLIST_DOWNLOAD="kinetics-400_train.csv" + +# path of the train and valid data +TRAIN_DIR="/home/sungaofeng/docker/dockermount/data/compress/train_256" +VALID_DIR="/home/sungaofeng/docker/dockermount/data/compress/val_256" + +python generate_filelist.py $TRAINLIST_DOWNLOAD $TRAIN_DIR $VALID_DIR trainlist.txt vallist.txt + +# generate test list +python generate_testlist_multicrop.py + diff --git a/PaddleCV/video/dataset/nonlocal/generate_testlist_multicrop.py b/PaddleCV/video/dataset/nonlocal/generate_testlist_multicrop.py new file mode 100644 index 00000000..f2d9b86c --- /dev/null +++ b/PaddleCV/video/dataset/nonlocal/generate_testlist_multicrop.py @@ -0,0 +1,21 @@ +import os + +vallist = 'vallist.txt' +testlist = 'testlist.txt' +sampling_times = 10 +cropping_times = 3 + +fl = open(vallist).readlines() +fl = [line.strip() for line in fl if line.strip() != ''] +f_test = open(testlist, 'w') + +for i in range(len(fl)): + line = fl[i].split(' ') + fn = line[0] + label = line[1] + for j in range(sampling_times): + for k in range(cropping_times): + test_item = fn + ' ' + str(i) + ' ' + str(j) + ' ' + str(k) + '\n' + f_test.write(test_item) + +f_test.close() -- GitLab