未验证 提交 c1c5ed64 编写于 作者: Q qingqing01 提交者: GitHub

Revert pr1732 (#1741)

* Delete file
* Rever the raw file
上级 4a8bea03
[submodule "PaddleNLP/LAC"] [submodule "fluid/PaddleNLP/LAC"]
path = PaddleNLP/LAC path = fluid/PaddleNLP/LAC
url = https://github.com/baidu/lac.git url = https://github.com/baidu/lac.git
[submodule "PaddleNLP/SimNet"] [submodule "fluid/PaddleNLP/SimNet"]
path = PaddleNLP/SimNet path = fluid/PaddleNLP/SimNet
url = https://github.com/baidu/AnyQ.git url = https://github.com/baidu/AnyQ.git
[submodule "PaddleNLP/Senta"] [submodule "fluid/PaddleNLP/Senta"]
path = PaddleNLP/Senta path = fluid/PaddleNLP/Senta
url = https://github.com/baidu/Senta.git url = https://github.com/baidu/Senta.git
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import operator
import numpy as np
import paddle.fluid as fluid
from absl import flags
FLAGS = flags.FLAGS
flags.DEFINE_float("bn_decay", 0.9, "batch norm decay")
flags.DEFINE_float("dropout_rate", 0.5, "dropout rate")
def calc_padding(img_width, stride, dilation, filter_width):
""" calculate pixels to padding in order to keep input/output size same. """
filter_width = dilation * (filter_width - 1) + 1
if img_width % stride == 0:
pad_along_width = max(filter_width - stride, 0)
else:
pad_along_width = max(filter_width - (img_width % stride), 0)
return pad_along_width // 2, pad_along_width - pad_along_width // 2
def conv(inputs,
filters,
kernel,
strides=(1, 1),
dilation=(1, 1),
num_groups=1,
conv_param=None):
""" normal conv layer """
if isinstance(kernel, (tuple, list)):
n = operator.mul(*kernel) * inputs.shape[1]
else:
n = kernel * kernel * inputs.shape[1]
# pad input
padding = (0, 0, 0, 0) \
+ calc_padding(inputs.shape[2], strides[0], dilation[0], kernel[0]) \
+ calc_padding(inputs.shape[3], strides[1], dilation[1], kernel[1])
if sum(padding) > 0:
inputs = fluid.layers.pad(inputs, padding, 0)
param_attr = fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
0.0, scale=np.sqrt(2.0 / n)),
regularizer=fluid.regularizer.L2Decay(FLAGS.weight_decay))
bias_attr = fluid.param_attr.ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.))
return fluid.layers.conv2d(
inputs,
filters,
kernel,
stride=strides,
padding=0,
dilation=dilation,
groups=num_groups,
param_attr=param_attr if conv_param is None else conv_param,
use_cudnn=False if num_groups == inputs.shape[1] == filters else True,
bias_attr=bias_attr,
act=None)
def sep(inputs, filters, kernel, strides=(1, 1), dilation=(1, 1)):
""" Separable convolution layer """
if isinstance(kernel, (tuple, list)):
n_depth = operator.mul(*kernel)
else:
n_depth = kernel * kernel
n_point = inputs.shape[1]
if isinstance(strides, (tuple, list)):
multiplier = strides[0]
else:
multiplier = strides
depthwise_param = fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
0.0, scale=np.sqrt(2.0 / n_depth)),
regularizer=fluid.regularizer.L2Decay(FLAGS.weight_decay))
pointwise_param = fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
0.0, scale=np.sqrt(2.0 / n_point)),
regularizer=fluid.regularizer.L2Decay(FLAGS.weight_decay))
depthwise_conv = conv(
inputs=inputs,
kernel=kernel,
filters=int(filters * multiplier),
strides=strides,
dilation=dilation,
num_groups=int(filters * multiplier),
conv_param=depthwise_param)
return conv(
inputs=depthwise_conv,
kernel=(1, 1),
filters=int(filters * multiplier),
strides=(1, 1),
dilation=dilation,
conv_param=pointwise_param)
def maxpool(inputs, kernel, strides=(1, 1)):
padding = (0, 0, 0, 0) \
+ calc_padding(inputs.shape[2], strides[0], 1, kernel[0]) \
+ calc_padding(inputs.shape[3], strides[1], 1, kernel[1])
if sum(padding) > 0:
inputs = fluid.layers.pad(inputs, padding, 0)
return fluid.layers.pool2d(
inputs, kernel, 'max', strides, pool_padding=0, ceil_mode=False)
def avgpool(inputs, kernel, strides=(1, 1)):
padding_pixel = (0, 0, 0, 0)
padding_pixel += calc_padding(inputs.shape[2], strides[0], 1, kernel[0])
padding_pixel += calc_padding(inputs.shape[3], strides[1], 1, kernel[1])
if padding_pixel[4] == padding_pixel[5] and padding_pixel[
6] == padding_pixel[7]:
# same padding pixel num on all sides.
return fluid.layers.pool2d(
inputs,
kernel,
'avg',
strides,
pool_padding=(padding_pixel[4], padding_pixel[6]),
ceil_mode=False)
elif padding_pixel[4] + 1 == padding_pixel[5] and padding_pixel[6] + 1 == padding_pixel[7] \
and strides == (1, 1):
# different padding size: first pad then crop.
x = fluid.layers.pool2d(
inputs,
kernel,
'avg',
strides,
pool_padding=(padding_pixel[5], padding_pixel[7]),
ceil_mode=False)
x_shape = x.shape
return fluid.layers.crop(
x,
shape=(-1, x_shape[1], x_shape[2] - 1, x_shape[3] - 1),
offsets=(0, 0, 1, 1))
else:
# not support. use padding-zero and pool2d.
print("Warning: use zero-padding in avgpool")
outputs = fluid.layers.pad(inputs, padding_pixel, 0)
return fluid.layers.pool2d(
outputs, kernel, 'avg', strides, pool_padding=0, ceil_mode=False)
def global_avgpool(inputs):
return fluid.layers.pool2d(
inputs,
1,
'avg',
1,
pool_padding=0,
global_pooling=True,
ceil_mode=True)
def fully_connected(inputs, units):
n = inputs.shape[1]
param_attr = fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
0.0, scale=np.sqrt(2.0 / n)),
regularizer=fluid.regularizer.L2Decay(FLAGS.weight_decay))
bias_attr = fluid.param_attr.ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.))
return fluid.layers.fc(inputs,
units,
param_attr=param_attr,
bias_attr=bias_attr)
def bn_relu(inputs):
""" batch norm + rely layer """
output = fluid.layers.batch_norm(
inputs, momentum=FLAGS.bn_decay, epsilon=0.001, data_layout="NCHW")
return fluid.layers.relu(output)
def dropout(inputs):
""" dropout layer """
return fluid.layers.dropout(inputs, dropout_prob=FLAGS.dropout_rate)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import build.layers as layers
def conv_1x1(inputs, downsample=False):
return conv_base(inputs, (1, 1), downsample=downsample)
def conv_2x2(inputs, downsample=False):
return conv_base(inputs, (2, 2), downsample=downsample)
def conv_3x3(inputs, downsample=False):
return conv_base(inputs, (3, 3), downsample=downsample)
def dilated_2x2(inputs, downsample=False):
return conv_base(inputs, (2, 2), (2, 2), downsample)
def conv_1x2_2x1(inputs, downsample=False):
return pair_base(inputs, 2, downsample)
def conv_1x3_3x1(inputs, downsample=False):
return pair_base(inputs, 3, downsample)
def sep_2x2(inputs, downsample=False):
return sep_base(inputs, (2, 2), downsample=downsample)
def sep_3x3(inputs, downsample=False):
return sep_base(inputs, (3, 3), downsample=downsample)
def maxpool_2x2(inputs, downsample=False):
return maxpool_base(inputs, (2, 2), downsample)
def maxpool_3x3(inputs, downsample=False):
return maxpool_base(inputs, (3, 3), downsample)
def avgpool_2x2(inputs, downsample=False):
return avgpool_base(inputs, (2, 2), downsample)
def avgpool_3x3(inputs, downsample=False):
return avgpool_base(inputs, (3, 3), downsample)
def conv_base(inputs, kernel, dilation=(1, 1), downsample=False):
filters = inputs.shape[1]
if downsample:
output = layers.conv(inputs, filters * 2, kernel, (2, 2))
else:
output = layers.conv(inputs, filters, kernel, dilation=dilation)
return output
def pair_base(inputs, kernel, downsample=False):
filters = inputs.shape[1]
if downsample:
output = layers.conv(inputs, filters, (1, kernel), (1, 2))
output = layers.conv(output, filters, (kernel, 1), (2, 1))
output = layers.conv(output, filters * 2, (1, 1))
else:
output = layers.conv(inputs, filters, (1, kernel))
output = layers.conv(output, filters, (kernel, 1))
return output
def sep_base(inputs, kernel, dilation=(1, 1), downsample=False):
filters = inputs.shape[1]
if downsample:
output = layers.sep(inputs, filters * 2, kernel, (2, 2))
else:
output = layers.sep(inputs, filters, kernel, dilation=dilation)
return output
def maxpool_base(inputs, kernel, downsample=False):
if downsample:
filters = inputs.shape[1]
output = layers.maxpool(inputs, kernel, (2, 2))
output = layers.conv(output, filters * 2, (1, 1))
else:
output = layers.maxpool(inputs, kernel)
return output
def avgpool_base(inputs, kernel, downsample=False):
if downsample:
filters = inputs.shape[1]
output = layers.avgpool(inputs, kernel, (2, 2))
output = layers.conv(output, filters * 2, (1, 1))
else:
output = layers.avgpool(inputs, kernel)
return output
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from absl import flags
import build.layers as layers
import build.ops as _ops
FLAGS = flags.FLAGS
flags.DEFINE_integer("num_stages", 3, "number of stages")
flags.DEFINE_integer("num_blocks", 5, "number of blocks per stage")
flags.DEFINE_integer("num_ops", 2, "number of operations per block")
flags.DEFINE_integer("width", 64, "network width")
flags.DEFINE_string("downsample", "pool", "conv or pool")
num_classes = 10
ops = [
_ops.conv_1x1,
_ops.conv_2x2,
_ops.conv_3x3,
_ops.dilated_2x2,
_ops.conv_1x2_2x1,
_ops.conv_1x3_3x1,
_ops.sep_2x2,
_ops.sep_3x3,
_ops.maxpool_2x2,
_ops.maxpool_3x3,
_ops.avgpool_2x2,
_ops.avgpool_3x3,
]
def net(inputs, tokens):
""" build network with skip links """
x = layers.conv(inputs, FLAGS.width, (3, 3))
num_ops = FLAGS.num_blocks * FLAGS.num_ops
x = stage(x, tokens[:num_ops], pre_activation=True)
for i in range(1, FLAGS.num_stages):
x = stage(x, tokens[i * num_ops:(i + 1) * num_ops], downsample=True)
x = layers.bn_relu(x)
x = layers.global_avgpool(x)
x = layers.dropout(x)
logits = layers.fully_connected(x, num_classes)
return fluid.layers.softmax(logits)
def stage(x, tokens, pre_activation=False, downsample=False):
""" build network's stage. Stage consists of blocks """
x = block(x, tokens[:FLAGS.num_ops], pre_activation, downsample)
for i in range(1, FLAGS.num_blocks):
print("-" * 12)
x = block(x, tokens[i * FLAGS.num_ops:(i + 1) * FLAGS.num_ops])
print("=" * 12)
return x
def block(x, tokens, pre_activation=False, downsample=False):
""" build block. """
if pre_activation:
x = layers.bn_relu(x)
res = x
else:
res = x
x = layers.bn_relu(x)
x = ops[tokens[0]](x, downsample)
print("%s \t-> shape %s" % (ops[0].__name__, x.shape))
for token in tokens[1:]:
x = layers.bn_relu(x)
x = ops[token](x)
print("%s \t-> shape %s" % (ops[token].__name__, x.shape))
if downsample:
filters = res.shape[1]
if FLAGS.downsample == "conv":
res = layers.conv(res, filters * 2, (1, 1), (2, 2))
elif FLAGS.downsample == "pool":
res = layers.avgpool(res, (2, 2), (2, 2))
res = fluid.layers.pad(res, (0, 0, filters // 2, filters // 2, 0, 0,
0, 0))
else:
raise NotImplementedError
return x + res
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from absl import flags
import build.layers as layers
import build.ops as _ops
FLAGS = flags.FLAGS
flags.DEFINE_integer("num_stages", 5, "number of stages")
flags.DEFINE_integer("width", 64, "network width")
num_classes = 10
ops = [
_ops.conv_1x1, #0
_ops.conv_2x2, #1
_ops.conv_3x3, #2
_ops.dilated_2x2, #3
_ops.conv_1x2_2x1, #4
_ops.conv_1x3_3x1, #5
_ops.sep_2x2, #6
_ops.sep_3x3, #7
_ops.maxpool_2x2, #8
_ops.maxpool_3x3,
_ops.avgpool_2x2, #10
_ops.avgpool_3x3,
]
def net(inputs, tokens):
depth = len(tokens)
q, r = divmod(depth + 1, FLAGS.num_stages)
downsample_steps = [
i * q + max(0, i + r - FLAGS.num_stages + 1) - 2
for i in range(1, FLAGS.num_stages)
]
x = layers.conv(inputs, FLAGS.width, (3, 3))
x = layers.bn_relu(x)
for i, token in enumerate(tokens):
downsample = i in downsample_steps
x = ops[token](x, downsample)
print("%s \t-> shape %s" % (ops[token].__name__, x.shape))
if downsample:
print("=" * 12)
x = layers.bn_relu(x)
x = layers.global_avgpool(x)
x = layers.dropout(x)
logits = layers.fully_connected(x, num_classes)
return fluid.layers.softmax(logits)
1. download kinetics-400_train.csv and kinetics-400_val.csv
2. ffmpeg is required to decode mp4
3. transfer mp4 video to pkl file, with each pkl stores [video_id, images, label]
python generate_label.py kinetics-400_train.csv kinetics400_label.txt # generate label file
python video2pkl.py kinetics-400_train.csv $Source_dir $Target_dir $NUM_THREADS
import sys
# kinetics-400_train.csv should be down loaded first and set as sys.argv[1]
# sys.argv[2] can be set as kinetics400_label.txt
# python generate_label.py kinetics-400_train.csv kinetics400_label.txt
num_classes = 400
fname = sys.argv[1]
outname = sys.argv[2]
fl = open(fname).readlines()
fl = fl[1:]
outf = open(outname, 'w')
label_list = []
for line in fl:
label = line.strip().split(',')[0].strip('"')
if label in label_list:
continue
else:
label_list.append(label)
assert len(label_list
) == num_classes, "there should be {} labels in list, but ".format(
num_classes, len(label_list))
label_list.sort()
for i in range(num_classes):
outf.write('{} {}'.format(label_list[i], i) + '\n')
outf.close()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import glob
import cPickle
from multiprocessing import Pool
# example command line: python generate_k400_pkl.py kinetics-400_train.csv 8
#
# kinetics-400_train.csv is the training set file of K400 official release
# each line contains laebl,youtube_id,time_start,time_end,split,is_cc
assert (len(sys.argv) == 5)
f = open(sys.argv[1])
source_dir = sys.argv[2]
target_dir = sys.argv[3]
num_threads = sys.argv[4]
all_video_entries = [x.strip().split(',') for x in f.readlines()]
all_video_entries = all_video_entries[1:]
f.close()
category_label_map = {}
f = open('kinetics400_label.txt')
for line in f:
ens = line.strip().split(' ')
category = " ".join(ens[0:-1])
label = int(ens[-1])
category_label_map[category] = label
f.close()
def generate_pkl(entry):
mode = entry[4]
category = entry[0].strip('"')
category_dir = category
video_path = os.path.join(
'./',
entry[1] + "_%06d" % int(entry[2]) + "_%06d" % int(entry[3]) + ".mp4")
video_path = os.path.join(source_dir, category_dir, video_path)
label = category_label_map[category]
vid = './' + video_path.split('/')[-1].split('.')[0]
if os.path.exists(video_path):
if not os.path.exists(vid):
os.makedirs(vid)
os.system('ffmpeg -i ' + video_path + ' -q 0 ' + vid + '/%06d.jpg')
else:
print("File not exists {}".format(video_path))
return
images = sorted(glob.glob(vid + '/*.jpg'))
ims = []
for img in images:
f = open(img)
ims.append(f.read())
f.close()
output_pkl = vid + ".pkl"
output_pkl = os.path.join(target_dir, output_pkl)
f = open(output_pkl, 'w')
cPickle.dump((vid, label, ims), f, -1)
f.close()
os.system('rm -rf %s' % vid)
pool = Pool(processes=int(sys.argv[4]))
pool.map(generate_pkl, all_video_entries)
pool.close()
pool.join()
1. Tensorflow is required to process tfrecords
2. python tf2pkl.py $Source_dir $Target_dir
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
"""Provides readers configured for different datasets."""
import os, sys
import numpy as np
import tensorflow as tf
from tensorflow import logging
import cPickle
from tensorflow.python.platform import gfile
assert (len(sys.argv) == 3)
source_dir = sys.argv[1]
target_dir = sys.argv[2]
def Dequantize(feat_vector, max_quantized_value=2, min_quantized_value=-2):
"""Dequantize the feature from the byte format to the float format.
Args:
feat_vector: the input 1-d vector.
max_quantized_value: the maximum of the quantized value.
min_quantized_value: the minimum of the quantized value.
Returns:
A float vector which has the same shape as feat_vector.
"""
assert max_quantized_value > min_quantized_value
quantized_range = max_quantized_value - min_quantized_value
scalar = quantized_range / 255.0
bias = (quantized_range / 512.0) + min_quantized_value
return feat_vector * scalar + bias
def resize_axis(tensor, axis, new_size, fill_value=0):
"""Truncates or pads a tensor to new_size on on a given axis.
Truncate or extend tensor such that tensor.shape[axis] == new_size. If the
size increases, the padding will be performed at the end, using fill_value.
Args:
tensor: The tensor to be resized.
axis: An integer representing the dimension to be sliced.
new_size: An integer or 0d tensor representing the new value for
tensor.shape[axis].
fill_value: Value to use to fill any new entries in the tensor. Will be
cast to the type of tensor.
Returns:
The resized tensor.
"""
tensor = tf.convert_to_tensor(tensor)
shape = tf.unstack(tf.shape(tensor))
pad_shape = shape[:]
pad_shape[axis] = tf.maximum(0, new_size - shape[axis])
shape[axis] = tf.minimum(shape[axis], new_size)
shape = tf.stack(shape)
resized = tf.concat([
tf.slice(tensor, tf.zeros_like(shape), shape),
tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype))
], axis)
# Update shape.
new_shape = tensor.get_shape().as_list() # A copy is being made.
new_shape[axis] = new_size
resized.set_shape(new_shape)
return resized
class BaseReader(object):
"""Inherit from this class when implementing new readers."""
def prepare_reader(self, unused_filename_queue):
"""Create a thread for generating prediction and label tensors."""
raise NotImplementedError()
class YT8MFrameFeatureReader(BaseReader):
"""Reads TFRecords of SequenceExamples.
The TFRecords must contain SequenceExamples with the sparse in64 'labels'
context feature and a fixed length byte-quantized feature vector, obtained
from the features in 'feature_names'. The quantized features will be mapped
back into a range between min_quantized_value and max_quantized_value.
"""
def __init__(self,
num_classes=3862,
feature_sizes=[1024],
feature_names=["inc3"],
max_frames=300):
"""Construct a YT8MFrameFeatureReader.
Args:
num_classes: a positive integer for the number of classes.
feature_sizes: positive integer(s) for the feature dimensions as a list.
feature_names: the feature name(s) in the tensorflow record as a list.
max_frames: the maximum number of frames to process.
"""
assert len(feature_names) == len(feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(feature_names), len(feature_sizes))
self.num_classes = num_classes
self.feature_sizes = feature_sizes
self.feature_names = feature_names
self.max_frames = max_frames
def get_video_matrix(self, features, feature_size, max_frames,
max_quantized_value, min_quantized_value):
"""Decodes features from an input string and quantizes it.
Args:
features: raw feature values
feature_size: length of each frame feature vector
max_frames: number of frames (rows) in the output feature_matrix
max_quantized_value: the maximum of the quantized value.
min_quantized_value: the minimum of the quantized value.
Returns:
feature_matrix: matrix of all frame-features
num_frames: number of frames in the sequence
"""
decoded_features = tf.reshape(
tf.cast(tf.decode_raw(features, tf.uint8), tf.float32),
[-1, feature_size])
num_frames = tf.minimum(tf.shape(decoded_features)[0], max_frames)
feature_matrix = decoded_features
return feature_matrix, num_frames
def prepare_reader(self,
filename_queue,
max_quantized_value=2,
min_quantized_value=-2):
"""Creates a single reader thread for YouTube8M SequenceExamples.
Args:
filename_queue: A tensorflow queue of filename locations.
max_quantized_value: the maximum of the quantized value.
min_quantized_value: the minimum of the quantized value.
Returns:
A tuple of video indexes, video features, labels, and padding data.
"""
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
contexts, features = tf.parse_single_sequence_example(
serialized_example,
context_features={
"id": tf.FixedLenFeature([], tf.string),
"labels": tf.VarLenFeature(tf.int64)
},
sequence_features={
feature_name: tf.FixedLenSequenceFeature(
[], dtype=tf.string)
for feature_name in self.feature_names
})
# read ground truth labels
labels = (tf.cast(
tf.sparse_to_dense(
contexts["labels"].values, (self.num_classes, ),
1,
validate_indices=False),
tf.bool))
# loads (potentially) different types of features and concatenates them
num_features = len(self.feature_names)
assert num_features > 0, "No feature selected: feature_names is empty!"
assert len(self.feature_names) == len(self.feature_sizes), \
"length of feature_names (={}) != length of feature_sizes (={})".format( \
len(self.feature_names), len(self.feature_sizes))
num_frames = -1 # the number of frames in the video
feature_matrices = [None
] * num_features # an array of different features
for feature_index in range(num_features):
feature_matrix, num_frames_in_this_feature = self.get_video_matrix(
features[self.feature_names[feature_index]],
self.feature_sizes[feature_index], self.max_frames,
max_quantized_value, min_quantized_value)
if num_frames == -1:
num_frames = num_frames_in_this_feature
#else:
# tf.assert_equal(num_frames, num_frames_in_this_feature)
feature_matrices[feature_index] = feature_matrix
# cap the number of frames at self.max_frames
num_frames = tf.minimum(num_frames, self.max_frames)
# concatenate different features
video_matrix = feature_matrices[0]
audio_matrix = feature_matrices[1]
return contexts["id"], video_matrix, audio_matrix, labels, num_frames
def main(files_pattern):
data_files = gfile.Glob(files_pattern)
filename_queue = tf.train.string_input_producer(
data_files, num_epochs=1, shuffle=False)
reader = YT8MFrameFeatureReader(
feature_sizes=[1024, 128], feature_names=["rgb", "audio"])
vals = reader.prepare_reader(filename_queue)
with tf.Session() as sess:
sess.run(tf.initialize_local_variables())
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
vid_num = 0
all_data = []
try:
while not coord.should_stop():
vid, features, audios, labels, nframes = sess.run(vals)
label_index = np.where(labels == True)[0].tolist()
vid_num += 1
#print vid, features.shape, audios.shape, label_index, nframes
features_int = features.astype(np.uint8)
audios_int = audios.astype(np.uint8)
value_dict = {}
value_dict['video'] = vid
value_dict['feature'] = features_int
value_dict['audio'] = audios_int
value_dict['label'] = label_index
value_dict['nframes'] = nframes
all_data.append(value_dict)
except tf.errors.OutOfRangeError:
print('Finished extracting.')
finally:
coord.request_stop()
coord.join(threads)
print vid_num
record_name = files_pattern.split('/')[-1].split('.')[0]
outputdir = target_dir
fn = '%s.pkl' % record_name
outp = open(os.path.join(outputdir, fn), 'wb')
cPickle.dump(all_data, outp, protocol=cPickle.HIGHEST_PROTOCOL)
outp.close()
if __name__ == '__main__':
record_dir = source_dir
record_files = os.listdir(record_dir)
for f in record_files:
record_path = os.path.join(record_dir, f)
main(record_path)
Subproject commit a4eb73b2fb64d8aab8499a1184edf4fc386f8268
PaddleNLP
============
机器翻译
--------
机器翻译(Machine Translation)将一种自然语言(源语言)转换成一种自然语言(目标语音),是自然语言处理中非常基础和重要的研究方向。在全球化的浪潮中,机器翻译在促进跨语言文明的交流中所起的重要作用是不言而喻的。其发展经历了统计机器翻译和基于神经网络的神经机器翻译(Nueural
Machine Translation, NMT)等阶段。在 NMT 成熟后,机器翻译才真正得以大规模应用。而早阶段的 NMT 主要是基于循环神经网络 RNN 的,其训练过程中当前时间步依赖于前一个时间步的计算,时间步之间难以并行化以提高训练速度。因此,非 RNN 结构的 NMT 得以应运而生,例如基 卷积神经网络 CNN 的结构和基于自注意力机制(Self-Attention)的结构。
本实例所实现的 Transformer 就是一个基于自注意力机制的机器翻译模型,其中不再有RNN或CNN结构,而是完全利用 Attention 学习语言中的上下文依赖。相较于RNN/CNN, 这种结构在单层内计算复杂度更低、易于并行化、对长程依赖更易建模,最终在多种语言之间取得了最好的翻译效果。
- [Transformer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/neural_machine_translation/transformer/README_cn.md)
中文词法分析
------------
中文分词(Word Segmentation)是将连续的自然语言文本,切分出具有语义合理性和完整性的词汇序列的过程。因为在汉语中,词是承担语义的最基本单位,切词是文本分类、情感分析、信息检索等众多自然语言处理任务的基础。 词性标注(Part-of-speech Tagging)是为自然语言文本中的每一个词汇赋予一个词性的过程,这里的词性包括名词、动词、形容词、副词等等。 命名实体识别(Named Entity Recognition,NER)又称作“专名识别”,是指识别自然语言文本中具有特定意义的实体,主要包括人名、地名、机构名、专有名词等。 我们将这三个任务统一成一个联合任务,称为词法分析任务,基于深度神经网络,利用海量标注语料进行训练,提供了一个端到端的解决方案。
我们把这个联合的中文词法分析解决方案命名为LAC。LAC既可以认为是Lexical Analysis of Chinese的首字母缩写,也可以认为是LAC Analyzes Chinese的递归缩写。
- [LAC](https://github.com/baidu/lac/blob/master/README.md)
情感倾向分析
------------
情感倾向分析针对带有主观描述的中文文本,可自动判断该文本的情感极性类别并给出相应的置信度。情感类型分为积极、消极、中性。情感倾向分析能够帮助企业理解用户消费习惯、分析热点话题和危机舆情监控,为企业提供有力的决策支持。本次我们开放 AI 开放平台中情感倾向分析采用的[模型](http://ai.baidu.com/tech/nlp/sentiment_classify),提供给用户使用。
- [Senta](https://github.com/baidu/Senta/blob/master/README.md)
语义匹配
--------
在自然语言处理很多场景中,需要度量两个文本在语义上的相似度,这类任务通常被称为语义匹配。例如在搜索中根据查询与候选文档的相似度对搜索结果进行排序,文本去重中文本与文本相似度的计算,自动问答中候选答案与问题的匹配等。
本例所开放的DAM (Deep Attention Matching Network)为百度自然语言处理部发表于ACL-2018的工作,用于检索式聊天机器人多轮对话中应答的选择。DAM受Transformer的启发,其网络结构完全基于注意力(attention)机制,利用栈式的self-attention结构分别学习不同粒度下应答和语境的语义表示,然后利用cross-attention获取应答与语境之间的相关性,在两个大规模多轮对话数据集上的表现均好于其它模型。
- [Deep Attention Matching Network](https://github.com/PaddlePaddle/models/tree/develop/PaddleNLP/deep_attention_matching_net)
AnyQ
----
[AnyQ](https://github.com/baidu/AnyQ)(ANswer Your Questions) 开源项目主要包含面向FAQ集合的问答系统框架、文本语义匹配工具SimNet。 问答系统框架采用了配置化、插件化的设计,各功能均通过插件形式加入,当前共开放了20+种插件。开发者可以使用AnyQ系统快速构建和定制适用于特定业务场景的FAQ问答系统,并加速迭代和升级。
SimNet是百度自然语言处理部于2013年自主研发的语义匹配框架,该框架在百度各产品上广泛应用,主要包括BOW、CNN、RNN、MM-DNN等核心网络结构形式,同时基于该框架也集成了学术界主流的语义匹配模型,如MatchPyramid、MV-LSTM、K-NRM等模型。使用SimNet构建出的模型可以便捷的加入AnyQ系统中,增强AnyQ系统的语义匹配能力。
- [SimNet in PaddlePaddle Fluid](https://github.com/baidu/AnyQ/blob/master/tools/simnet/train/paddle/README.md)
机器阅读理解
----------
机器阅读理解(MRC)是自然语言处理(NLP)中的核心任务之一,最终目标是让机器像人类一样阅读文本,提炼文本信息并回答相关问题。深度学习近年来在NLP中得到广泛使用,也使得机器阅读理解能力在近年有了大幅提高,但是目前研究的机器阅读理解都采用人工构造的数据集,以及回答一些相对简单的问题,和人类处理的数据还有明显差距,因此亟需大规模真实训练数据推动MRC的进一步发展。
百度阅读理解数据集是由百度自然语言处理部开源的一个真实世界数据集,所有的问题、原文都来源于实际数据(百度搜索引擎数据和百度知道问答社区),答案是由人类回答的。每个问题都对应多个答案,数据集包含200k问题、1000k原文和420k答案,是目前最大的中文MRC数据集。百度同时开源了对应的阅读理解模型,称为DuReader,采用当前通用的网络分层结构,通过双向attention机制捕捉问题和原文之间的交互关系,生成query-aware的原文表示,最终基于query-aware的原文表示通过point network预测答案范围。
- [DuReader in PaddlePaddle Fluid](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/machine_reading_comprehension/README.md)
Subproject commit dc1af6a83dd1372055158ac6d17f6d14b3a0f0f8
Subproject commit 57b93859aa070ae6d96f10a470b1bdf2cfaea052
PaddleRL
============
强化学习
--------
强化学习是近年来一个愈发重要的机器学习方向,特别是与深度学习相结合而形成的深度强化学习(Deep Reinforcement Learning, DRL),取得了很多令人惊异的成就。人们所熟知的战胜人类顶级围棋职业选手的 AlphaGo 就是 DRL 应用的一个典型例子,除游戏领域外,其它的应用还包括机器人、自然语言处理等。
深度强化学习的开山之作是在Atari视频游戏中的成功应用, 其可直接接受视频帧这种高维输入并根据图像内容端到端地预测下一步的动作,所用到的模型被称为深度Q网络(Deep Q-Network, DQN)。本实例就是利用PaddlePaddle Fluid这个灵活的框架,实现了 DQN 及其变体,并测试了它们在 Atari 游戏中的表现。
- [DeepQNetwork](https://github.com/PaddlePaddle/models/blob/develop/PaddleRL/DeepQNetwork/README_cn.md)
PaddleRec
============
个性化推荐
-------
推荐系统在当前的互联网服务中正在发挥越来越大的作用,目前大部分电子商务系统、社交网络,广告推荐,搜索引擎,都不同程度的使用了各种形式的个性化推荐技术,帮助用户快速找到他们想要的信息。
在工业可用的推荐系统中,推荐策略一般会被划分为多个模块串联执行。以新闻推荐系统为例,存在多个可以使用深度学习技术的环节,例如新闻的自动化标注,个性化新闻召回,个性化匹配与排序等。PaddlePaddle对推荐算法的训练提供了完整的支持,并提供了多种模型配置供用户选择。
- [TagSpace](https://github.com/PaddlePaddle/models/tree/develop/PaddleRec/tagspace)
- [GRU4Rec](https://github.com/PaddlePaddle/models/tree/develop/PaddleRec/gru4rec)
- [SequenceSemanticRetrieval](https://github.com/PaddlePaddle/models/tree/develop/PaddleRec/ssr)
- [DeepCTR](https://github.com/PaddlePaddle/models/blob/develop/PaddleRec/ctr/README.cn.md)
- [Multiview-Simnet](https://github.com/PaddlePaddle/models/tree/develop/PaddleRec/multiview_simnet)
PaddleSpeech
============
语音识别
--------
自动语音识别(Automatic Speech Recognition, ASR)是将人类声音中的词汇内容转录成计算机可输入的文字的技术。语音识别的相关研究经历了漫长的探索过程,在HMM/GMM模型之后其发展一直较为缓慢,随着深度学习的兴起,其迎来了春天。在多种语言识别任务中,将深度神经网络(DNN)作为声学模型,取得了比GMM更好的性能,使得 ASR 成为深度学习应用最为成功的领域之一。而由于识别准确率的不断提高,有越来越多的语言技术产品得以落地,例如语言输入法、以智能音箱为代表的智能家居设备等 — 基于语言的交互方式正在深刻的改变人类的生活。
[DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech) 中深度学习模型端到端直接预测字词的分布不同,本实例更接近传统的语言识别流程,以音素为建模单元,关注语言识别中声学模型的训练,利用[kaldi](http://www.kaldi-asr.org) 进行音频数据的特征提取和标签对齐,并集成 kaldi 的解码器完成解码。
- [DeepASR](https://github.com/PaddlePaddle/models/blob/develop/PaddleSpeech/DeepASR/README_cn.md)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册