Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
weixin_51232023
models
提交
dbc24a3b
M
models
项目概览
weixin_51232023
/
models
与 Fork 源项目一致
Fork自
PaddlePaddle / models
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
dbc24a3b
编写于
2月 02, 2019
作者:
Q
qingqing01
提交者:
GitHub
2月 02, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add PaddleCV/video/dataset (#1743)
上级
68ad52d4
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
400 addition
and
1 deletion
+400
-1
fluid/PaddleCV/video/.gitignore
fluid/PaddleCV/video/.gitignore
+0
-1
fluid/PaddleCV/video/dataset/kinetics/README.md
fluid/PaddleCV/video/dataset/kinetics/README.md
+5
-0
fluid/PaddleCV/video/dataset/kinetics/generate_label.py
fluid/PaddleCV/video/dataset/kinetics/generate_label.py
+31
-0
fluid/PaddleCV/video/dataset/kinetics/video2pkl.py
fluid/PaddleCV/video/dataset/kinetics/video2pkl.py
+84
-0
fluid/PaddleCV/video/dataset/youtube8m/README.md
fluid/PaddleCV/video/dataset/youtube8m/README.md
+2
-0
fluid/PaddleCV/video/dataset/youtube8m/tf2pkl.py
fluid/PaddleCV/video/dataset/youtube8m/tf2pkl.py
+278
-0
fluid/PaddleCV/video/dataset/youtube8m/yt8m_pca/eigenvals.npy
...d/PaddleCV/video/dataset/youtube8m/yt8m_pca/eigenvals.npy
+0
-0
未找到文件。
fluid/PaddleCV/video/.gitignore
浏览文件 @
dbc24a3b
dataset
checkpoints
output*
*.pyc
...
...
fluid/PaddleCV/video/dataset/kinetics/README.md
0 → 100644
浏览文件 @
dbc24a3b
1.
download kinetics-400_train.csv and kinetics-400_val.csv
2.
ffmpeg is required to decode mp4
3.
transfer mp4 video to pkl file, with each pkl stores [video_id, images, label]
python generate_label.py kinetics-400_train.csv kinetics400_label.txt # generate label file
python video2pkl.py kinetics-400_train.csv $Source_dir $Target_dir $NUM_THREADS
fluid/PaddleCV/video/dataset/kinetics/generate_label.py
0 → 100644
浏览文件 @
dbc24a3b
import
sys
# kinetics-400_train.csv should be down loaded first and set as sys.argv[1]
# sys.argv[2] can be set as kinetics400_label.txt
# python generate_label.py kinetics-400_train.csv kinetics400_label.txt
num_classes
=
400
fname
=
sys
.
argv
[
1
]
outname
=
sys
.
argv
[
2
]
fl
=
open
(
fname
).
readlines
()
fl
=
fl
[
1
:]
outf
=
open
(
outname
,
'w'
)
label_list
=
[]
for
line
in
fl
:
label
=
line
.
strip
().
split
(
','
)[
0
].
strip
(
'"'
)
if
label
in
label_list
:
continue
else
:
label_list
.
append
(
label
)
assert
len
(
label_list
)
==
num_classes
,
"there should be {} labels in list, but "
.
format
(
num_classes
,
len
(
label_list
))
label_list
.
sort
()
for
i
in
range
(
num_classes
):
outf
.
write
(
'{} {}'
.
format
(
label_list
[
i
],
i
)
+
'
\n
'
)
outf
.
close
()
fluid/PaddleCV/video/dataset/kinetics/video2pkl.py
0 → 100644
浏览文件 @
dbc24a3b
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import
os
import
sys
import
glob
import
cPickle
from
multiprocessing
import
Pool
# example command line: python generate_k400_pkl.py kinetics-400_train.csv 8
#
# kinetics-400_train.csv is the training set file of K400 official release
# each line contains laebl,youtube_id,time_start,time_end,split,is_cc
assert
(
len
(
sys
.
argv
)
==
5
)
f
=
open
(
sys
.
argv
[
1
])
source_dir
=
sys
.
argv
[
2
]
target_dir
=
sys
.
argv
[
3
]
num_threads
=
sys
.
argv
[
4
]
all_video_entries
=
[
x
.
strip
().
split
(
','
)
for
x
in
f
.
readlines
()]
all_video_entries
=
all_video_entries
[
1
:]
f
.
close
()
category_label_map
=
{}
f
=
open
(
'kinetics400_label.txt'
)
for
line
in
f
:
ens
=
line
.
strip
().
split
(
' '
)
category
=
" "
.
join
(
ens
[
0
:
-
1
])
label
=
int
(
ens
[
-
1
])
category_label_map
[
category
]
=
label
f
.
close
()
def
generate_pkl
(
entry
):
mode
=
entry
[
4
]
category
=
entry
[
0
].
strip
(
'"'
)
category_dir
=
category
video_path
=
os
.
path
.
join
(
'./'
,
entry
[
1
]
+
"_%06d"
%
int
(
entry
[
2
])
+
"_%06d"
%
int
(
entry
[
3
])
+
".mp4"
)
video_path
=
os
.
path
.
join
(
source_dir
,
category_dir
,
video_path
)
label
=
category_label_map
[
category
]
vid
=
'./'
+
video_path
.
split
(
'/'
)[
-
1
].
split
(
'.'
)[
0
]
if
os
.
path
.
exists
(
video_path
):
if
not
os
.
path
.
exists
(
vid
):
os
.
makedirs
(
vid
)
os
.
system
(
'ffmpeg -i '
+
video_path
+
' -q 0 '
+
vid
+
'/%06d.jpg'
)
else
:
print
(
"File not exists {}"
.
format
(
video_path
))
return
images
=
sorted
(
glob
.
glob
(
vid
+
'/*.jpg'
))
ims
=
[]
for
img
in
images
:
f
=
open
(
img
)
ims
.
append
(
f
.
read
())
f
.
close
()
output_pkl
=
vid
+
".pkl"
output_pkl
=
os
.
path
.
join
(
target_dir
,
output_pkl
)
f
=
open
(
output_pkl
,
'w'
)
cPickle
.
dump
((
vid
,
label
,
ims
),
f
,
-
1
)
f
.
close
()
os
.
system
(
'rm -rf %s'
%
vid
)
pool
=
Pool
(
processes
=
int
(
sys
.
argv
[
4
]))
pool
.
map
(
generate_pkl
,
all_video_entries
)
pool
.
close
()
pool
.
join
()
fluid/PaddleCV/video/dataset/youtube8m/README.md
0 → 100644
浏览文件 @
dbc24a3b
1.
Tensorflow is required to process tfrecords
2.
python tf2pkl.py $Source_dir $Target_dir
fluid/PaddleCV/video/dataset/youtube8m/tf2pkl.py
0 → 100644
浏览文件 @
dbc24a3b
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
"""Provides readers configured for different datasets."""
import
os
,
sys
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow
import
logging
import
cPickle
from
tensorflow.python.platform
import
gfile
assert
(
len
(
sys
.
argv
)
==
3
)
source_dir
=
sys
.
argv
[
1
]
target_dir
=
sys
.
argv
[
2
]
def
Dequantize
(
feat_vector
,
max_quantized_value
=
2
,
min_quantized_value
=-
2
):
"""Dequantize the feature from the byte format to the float format.
Args:
feat_vector: the input 1-d vector.
max_quantized_value: the maximum of the quantized value.
min_quantized_value: the minimum of the quantized value.
Returns:
A float vector which has the same shape as feat_vector.
"""
assert
max_quantized_value
>
min_quantized_value
quantized_range
=
max_quantized_value
-
min_quantized_value
scalar
=
quantized_range
/
255.0
bias
=
(
quantized_range
/
512.0
)
+
min_quantized_value
return
feat_vector
*
scalar
+
bias
def
resize_axis
(
tensor
,
axis
,
new_size
,
fill_value
=
0
):
"""Truncates or pads a tensor to new_size on on a given axis.
Truncate or extend tensor such that tensor.shape[axis] == new_size. If the
size increases, the padding will be performed at the end, using fill_value.
Args:
tensor: The tensor to be resized.
axis: An integer representing the dimension to be sliced.
new_size: An integer or 0d tensor representing the new value for
tensor.shape[axis].
fill_value: Value to use to fill any new entries in the tensor. Will be
cast to the type of tensor.
Returns:
The resized tensor.
"""
tensor
=
tf
.
convert_to_tensor
(
tensor
)
shape
=
tf
.
unstack
(
tf
.
shape
(
tensor
))
pad_shape
=
shape
[:]
pad_shape
[
axis
]
=
tf
.
maximum
(
0
,
new_size
-
shape
[
axis
])
shape
[
axis
]
=
tf
.
minimum
(
shape
[
axis
],
new_size
)
shape
=
tf
.
stack
(
shape
)
resized
=
tf
.
concat
([
tf
.
slice
(
tensor
,
tf
.
zeros_like
(
shape
),
shape
),
tf
.
fill
(
tf
.
stack
(
pad_shape
),
tf
.
cast
(
fill_value
,
tensor
.
dtype
))
],
axis
)
# Update shape.
new_shape
=
tensor
.
get_shape
().
as_list
()
# A copy is being made.
new_shape
[
axis
]
=
new_size
resized
.
set_shape
(
new_shape
)
return
resized
class
BaseReader
(
object
):
"""Inherit from this class when implementing new readers."""
def
prepare_reader
(
self
,
unused_filename_queue
):
"""Create a thread for generating prediction and label tensors."""
raise
NotImplementedError
()
class
YT8MFrameFeatureReader
(
BaseReader
):
"""Reads TFRecords of SequenceExamples.
The TFRecords must contain SequenceExamples with the sparse in64 'labels'
context feature and a fixed length byte-quantized feature vector, obtained
from the features in 'feature_names'. The quantized features will be mapped
back into a range between min_quantized_value and max_quantized_value.
"""
def
__init__
(
self
,
num_classes
=
3862
,
feature_sizes
=
[
1024
],
feature_names
=
[
"inc3"
],
max_frames
=
300
):
"""Construct a YT8MFrameFeatureReader.
Args:
num_classes: a positive integer for the number of classes.
feature_sizes: positive integer(s) for the feature dimensions as a list.
feature_names: the feature name(s) in the tensorflow record as a list.
max_frames: the maximum number of frames to process.
"""
assert
len
(
feature_names
)
==
len
(
feature_sizes
),
\
"length of feature_names (={}) != length of feature_sizes (={})"
.
format
(
\
len
(
feature_names
),
len
(
feature_sizes
))
self
.
num_classes
=
num_classes
self
.
feature_sizes
=
feature_sizes
self
.
feature_names
=
feature_names
self
.
max_frames
=
max_frames
def
get_video_matrix
(
self
,
features
,
feature_size
,
max_frames
,
max_quantized_value
,
min_quantized_value
):
"""Decodes features from an input string and quantizes it.
Args:
features: raw feature values
feature_size: length of each frame feature vector
max_frames: number of frames (rows) in the output feature_matrix
max_quantized_value: the maximum of the quantized value.
min_quantized_value: the minimum of the quantized value.
Returns:
feature_matrix: matrix of all frame-features
num_frames: number of frames in the sequence
"""
decoded_features
=
tf
.
reshape
(
tf
.
cast
(
tf
.
decode_raw
(
features
,
tf
.
uint8
),
tf
.
float32
),
[
-
1
,
feature_size
])
num_frames
=
tf
.
minimum
(
tf
.
shape
(
decoded_features
)[
0
],
max_frames
)
feature_matrix
=
decoded_features
return
feature_matrix
,
num_frames
def
prepare_reader
(
self
,
filename_queue
,
max_quantized_value
=
2
,
min_quantized_value
=-
2
):
"""Creates a single reader thread for YouTube8M SequenceExamples.
Args:
filename_queue: A tensorflow queue of filename locations.
max_quantized_value: the maximum of the quantized value.
min_quantized_value: the minimum of the quantized value.
Returns:
A tuple of video indexes, video features, labels, and padding data.
"""
reader
=
tf
.
TFRecordReader
()
_
,
serialized_example
=
reader
.
read
(
filename_queue
)
contexts
,
features
=
tf
.
parse_single_sequence_example
(
serialized_example
,
context_features
=
{
"id"
:
tf
.
FixedLenFeature
([],
tf
.
string
),
"labels"
:
tf
.
VarLenFeature
(
tf
.
int64
)
},
sequence_features
=
{
feature_name
:
tf
.
FixedLenSequenceFeature
(
[],
dtype
=
tf
.
string
)
for
feature_name
in
self
.
feature_names
})
# read ground truth labels
labels
=
(
tf
.
cast
(
tf
.
sparse_to_dense
(
contexts
[
"labels"
].
values
,
(
self
.
num_classes
,
),
1
,
validate_indices
=
False
),
tf
.
bool
))
# loads (potentially) different types of features and concatenates them
num_features
=
len
(
self
.
feature_names
)
assert
num_features
>
0
,
"No feature selected: feature_names is empty!"
assert
len
(
self
.
feature_names
)
==
len
(
self
.
feature_sizes
),
\
"length of feature_names (={}) != length of feature_sizes (={})"
.
format
(
\
len
(
self
.
feature_names
),
len
(
self
.
feature_sizes
))
num_frames
=
-
1
# the number of frames in the video
feature_matrices
=
[
None
]
*
num_features
# an array of different features
for
feature_index
in
range
(
num_features
):
feature_matrix
,
num_frames_in_this_feature
=
self
.
get_video_matrix
(
features
[
self
.
feature_names
[
feature_index
]],
self
.
feature_sizes
[
feature_index
],
self
.
max_frames
,
max_quantized_value
,
min_quantized_value
)
if
num_frames
==
-
1
:
num_frames
=
num_frames_in_this_feature
#else:
# tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices
[
feature_index
]
=
feature_matrix
# cap the number of frames at self.max_frames
num_frames
=
tf
.
minimum
(
num_frames
,
self
.
max_frames
)
# concatenate different features
video_matrix
=
feature_matrices
[
0
]
audio_matrix
=
feature_matrices
[
1
]
return
contexts
[
"id"
],
video_matrix
,
audio_matrix
,
labels
,
num_frames
def
main
(
files_pattern
):
data_files
=
gfile
.
Glob
(
files_pattern
)
filename_queue
=
tf
.
train
.
string_input_producer
(
data_files
,
num_epochs
=
1
,
shuffle
=
False
)
reader
=
YT8MFrameFeatureReader
(
feature_sizes
=
[
1024
,
128
],
feature_names
=
[
"rgb"
,
"audio"
])
vals
=
reader
.
prepare_reader
(
filename_queue
)
with
tf
.
Session
()
as
sess
:
sess
.
run
(
tf
.
initialize_local_variables
())
sess
.
run
(
tf
.
initialize_all_variables
())
coord
=
tf
.
train
.
Coordinator
()
threads
=
tf
.
train
.
start_queue_runners
(
sess
=
sess
,
coord
=
coord
)
vid_num
=
0
all_data
=
[]
try
:
while
not
coord
.
should_stop
():
vid
,
features
,
audios
,
labels
,
nframes
=
sess
.
run
(
vals
)
label_index
=
np
.
where
(
labels
==
True
)[
0
].
tolist
()
vid_num
+=
1
#print vid, features.shape, audios.shape, label_index, nframes
features_int
=
features
.
astype
(
np
.
uint8
)
audios_int
=
audios
.
astype
(
np
.
uint8
)
value_dict
=
{}
value_dict
[
'video'
]
=
vid
value_dict
[
'feature'
]
=
features_int
value_dict
[
'audio'
]
=
audios_int
value_dict
[
'label'
]
=
label_index
value_dict
[
'nframes'
]
=
nframes
all_data
.
append
(
value_dict
)
except
tf
.
errors
.
OutOfRangeError
:
print
(
'Finished extracting.'
)
finally
:
coord
.
request_stop
()
coord
.
join
(
threads
)
print
vid_num
record_name
=
files_pattern
.
split
(
'/'
)[
-
1
].
split
(
'.'
)[
0
]
outputdir
=
target_dir
fn
=
'%s.pkl'
%
record_name
outp
=
open
(
os
.
path
.
join
(
outputdir
,
fn
),
'wb'
)
cPickle
.
dump
(
all_data
,
outp
,
protocol
=
cPickle
.
HIGHEST_PROTOCOL
)
outp
.
close
()
if
__name__
==
'__main__'
:
record_dir
=
source_dir
record_files
=
os
.
listdir
(
record_dir
)
for
f
in
record_files
:
record_path
=
os
.
path
.
join
(
record_dir
,
f
)
main
(
record_path
)
fluid/PaddleCV/video/dataset/youtube8m/yt8m_pca/eigenvals.npy
0 → 100644
浏览文件 @
dbc24a3b
文件已添加
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录