提交 99dcaf65 编写于 作者: S SunGaofeng

add data preprocessing of nonlocal model

上级 1ca03187
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
import sys
import numpy as np
import random
# src = 'trainlist_download.txt'
# outlist = 'trainlist.txt'
# original_folder = '/nfs.yoda/xiaolonw/kinetics/data/train'
# replace_folder = '/scratch/xiaolonw/kinetics/data/compress/train_256'
assert (len(sys.argv) == 5)
src = sys.argv[1]
outlist = sys.argv[2]
original_folder = sys.argv[3]
replace_folder = sys.argv[4]
f = open(src, 'r')
flist = []
for line in f:
flist.append(line)
f.close()
f2 = open(outlist, 'w')
listlen = len(flist)
for i in range(listlen):
line = flist[i]
line = line.replace(original_folder, replace_folder)
f2.write(line)
f2.close()
import os
import numpy as np
import sys
num_classes = 400
replace_space_by_underliner = True # whether to replace space by '_' in labels
fn = sys.argv[1] #'trainlist_download400.txt'
train_dir = sys.argv[
2] #'/docker_mount/data/k400/Kinetics_trimmed_processed_train'
val_dir = sys.argv[3] #'/docker_mount/data/k400/Kinetics_trimmed_processed_val'
trainlist = sys.argv[4] #'trainlist.txt'
vallist = sys.argv[5] #'vallist.txt'
fl = open(fn).readlines()
fl = [line.strip() for line in fl if line.strip() != '']
action_list = []
for line in fl[1:]:
act = line.split(',')[0].strip('\"')
action_list.append(act)
action_set = set(action_list)
action_list = list(action_set)
action_list.sort()
if replace_space_by_underliner:
action_list = [item.replace(' ', '_') for item in action_list]
# assign integer label to each category, abseiling is labeled as 0,
# zumba labeled as 399 and so on, sorted by the category name
action_label_dict = {}
for i in range(len(action_list)):
key = action_list[i]
action_label_dict[key] = i
assert len(action_label_dict.keys(
)) == num_classes, "action num should be {}".format(num_classes)
def generate_file(Faction_label_dict, Ftrain_dir, Ftrainlist, Fnum_classes):
trainactions = os.listdir(Ftrain_dir)
trainactions.sort()
assert len(
trainactions) == Fnum_classes, "train action num should be {}".format(
Fnum_classes)
train_items = []
trainlist_outfile = open(Ftrainlist, 'w')
for trainaction in trainactions:
assert trainaction in Faction_label_dict.keys(
), "action {} should be in action_dict".format(trainaction)
trainaction_dir = os.path.join(Ftrain_dir, trainaction)
trainaction_label = Faction_label_dict[trainaction]
trainaction_files = os.listdir(trainaction_dir)
for f in trainaction_files:
fn = os.path.join(trainaction_dir, f)
item = fn + ' ' + str(trainaction_label)
train_items.append(item)
trainlist_outfile.write(item + '\n')
trainlist_outfile.flush()
trainlist_outfile.close()
generate_file(action_label_dict, train_dir, trainlist, num_classes)
generate_file(action_label_dict, val_dir, vallist, num_classes)
# Download txt name
TRAINLIST_DOWNLOAD="kinetics-400_train.csv"
# path of the train and valid data
TRAIN_DIR="/home/sungaofeng/docker/dockermount/data/compress/train_256"
VALID_DIR="/home/sungaofeng/docker/dockermount/data/compress/val_256"
python generate_filelist.py $TRAINLIST_DOWNLOAD $TRAIN_DIR $VALID_DIR trainlist.txt vallist.txt
# generate test list
python generate_testlist_multicrop.py
import os
vallist = 'vallist.txt'
testlist = 'testlist.txt'
sampling_times = 10
cropping_times = 3
fl = open(vallist).readlines()
fl = [line.strip() for line in fl if line.strip() != '']
f_test = open(testlist, 'w')
for i in range(len(fl)):
line = fl[i].split(' ')
fn = line[0]
label = line[1]
for j in range(sampling_times):
for k in range(cropping_times):
test_item = fn + ' ' + str(i) + ' ' + str(j) + ' ' + str(k) + '\n'
f_test.write(test_item)
f_test.close()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册