# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # #Licensed under the Apache License, Version 2.0 (the "License"); #you may not use this file except in compliance with the License. #You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # #Unless required by applicable law or agreed to in writing, software #distributed under the License is distributed on an "AS IS" BASIS, #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #See the License for the specific language governing permissions and #limitations under the License. """Provides readers configured for different datasets.""" import os, sys import numpy as np import tensorflow as tf from tensorflow import logging import cPickle from tensorflow.python.platform import gfile assert (len(sys.argv) == 3) source_dir = sys.argv[1] target_dir = sys.argv[2] def Dequantize(feat_vector, max_quantized_value=2, min_quantized_value=-2): """Dequantize the feature from the byte format to the float format. Args: feat_vector: the input 1-d vector. max_quantized_value: the maximum of the quantized value. min_quantized_value: the minimum of the quantized value. Returns: A float vector which has the same shape as feat_vector. """ assert max_quantized_value > min_quantized_value quantized_range = max_quantized_value - min_quantized_value scalar = quantized_range / 255.0 bias = (quantized_range / 512.0) + min_quantized_value return feat_vector * scalar + bias def resize_axis(tensor, axis, new_size, fill_value=0): """Truncates or pads a tensor to new_size on on a given axis. Truncate or extend tensor such that tensor.shape[axis] == new_size. If the size increases, the padding will be performed at the end, using fill_value. Args: tensor: The tensor to be resized. axis: An integer representing the dimension to be sliced. new_size: An integer or 0d tensor representing the new value for tensor.shape[axis]. fill_value: Value to use to fill any new entries in the tensor. Will be cast to the type of tensor. Returns: The resized tensor. """ tensor = tf.convert_to_tensor(tensor) shape = tf.unstack(tf.shape(tensor)) pad_shape = shape[:] pad_shape[axis] = tf.maximum(0, new_size - shape[axis]) shape[axis] = tf.minimum(shape[axis], new_size) shape = tf.stack(shape) resized = tf.concat([ tf.slice(tensor, tf.zeros_like(shape), shape), tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype)) ], axis) # Update shape. new_shape = tensor.get_shape().as_list() # A copy is being made. new_shape[axis] = new_size resized.set_shape(new_shape) return resized class BaseReader(object): """Inherit from this class when implementing new readers.""" def prepare_reader(self, unused_filename_queue): """Create a thread for generating prediction and label tensors.""" raise NotImplementedError() class YT8MFrameFeatureReader(BaseReader): """Reads TFRecords of SequenceExamples. The TFRecords must contain SequenceExamples with the sparse in64 'labels' context feature and a fixed length byte-quantized feature vector, obtained from the features in 'feature_names'. The quantized features will be mapped back into a range between min_quantized_value and max_quantized_value. """ def __init__(self, num_classes=3862, feature_sizes=[1024], feature_names=["inc3"], max_frames=300): """Construct a YT8MFrameFeatureReader. Args: num_classes: a positive integer for the number of classes. feature_sizes: positive integer(s) for the feature dimensions as a list. feature_names: the feature name(s) in the tensorflow record as a list. max_frames: the maximum number of frames to process. """ assert len(feature_names) == len(feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(feature_names), len(feature_sizes)) self.num_classes = num_classes self.feature_sizes = feature_sizes self.feature_names = feature_names self.max_frames = max_frames def get_video_matrix(self, features, feature_size, max_frames, max_quantized_value, min_quantized_value): """Decodes features from an input string and quantizes it. Args: features: raw feature values feature_size: length of each frame feature vector max_frames: number of frames (rows) in the output feature_matrix max_quantized_value: the maximum of the quantized value. min_quantized_value: the minimum of the quantized value. Returns: feature_matrix: matrix of all frame-features num_frames: number of frames in the sequence """ decoded_features = tf.reshape( tf.cast(tf.decode_raw(features, tf.uint8), tf.float32), [-1, feature_size]) num_frames = tf.minimum(tf.shape(decoded_features)[0], max_frames) feature_matrix = decoded_features return feature_matrix, num_frames def prepare_reader(self, filename_queue, max_quantized_value=2, min_quantized_value=-2): """Creates a single reader thread for YouTube8M SequenceExamples. Args: filename_queue: A tensorflow queue of filename locations. max_quantized_value: the maximum of the quantized value. min_quantized_value: the minimum of the quantized value. Returns: A tuple of video indexes, video features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={ "id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64) }, sequence_features={ feature_name: tf.FixedLenSequenceFeature( [], dtype=tf.string) for feature_name in self.feature_names }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense( contexts["labels"].values, (self.num_classes, ), 1, validate_indices=False), tf.bool)) # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) num_frames = -1 # the number of frames in the video feature_matrices = [None ] * num_features # an array of different features for feature_index in range(num_features): feature_matrix, num_frames_in_this_feature = self.get_video_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) if num_frames == -1: num_frames = num_frames_in_this_feature #else: # tf.assert_equal(num_frames, num_frames_in_this_feature) feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features video_matrix = feature_matrices[0] audio_matrix = feature_matrices[1] return contexts["id"], video_matrix, audio_matrix, labels, num_frames def main(files_pattern): data_files = gfile.Glob(files_pattern) filename_queue = tf.train.string_input_producer( data_files, num_epochs=1, shuffle=False) reader = YT8MFrameFeatureReader( feature_sizes=[1024, 128], feature_names=["rgb", "audio"]) vals = reader.prepare_reader(filename_queue) with tf.Session() as sess: sess.run(tf.initialize_local_variables()) sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) vid_num = 0 all_data = [] try: while not coord.should_stop(): vid, features, audios, labels, nframes = sess.run(vals) label_index = np.where(labels == True)[0].tolist() vid_num += 1 #print vid, features.shape, audios.shape, label_index, nframes features_int = features.astype(np.uint8) audios_int = audios.astype(np.uint8) value_dict = {} value_dict['video'] = vid value_dict['feature'] = features_int value_dict['audio'] = audios_int value_dict['label'] = label_index value_dict['nframes'] = nframes all_data.append(value_dict) except tf.errors.OutOfRangeError: print('Finished extracting.') finally: coord.request_stop() coord.join(threads) print vid_num record_name = files_pattern.split('/')[-1].split('.')[0] outputdir = target_dir fn = '%s.pkl' % record_name outp = open(os.path.join(outputdir, fn), 'wb') cPickle.dump(all_data, outp, protocol=cPickle.HIGHEST_PROTOCOL) outp.close() if __name__ == '__main__': record_dir = source_dir record_files = os.listdir(record_dir) for f in record_files: record_path = os.path.join(record_dir, f) main(record_path)