提交 f3af90d1 编写于 作者: N nhzlx

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into trt_dy_lib

test=develop
...@@ -72,7 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON) ...@@ -72,7 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON)
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF) option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_FAST_MATH "Make use of fast math library" OFF) option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON)
# PY_VERSION # PY_VERSION
if(NOT PY_VERSION) if(NOT PY_VERSION)
......
...@@ -178,6 +178,7 @@ paddle.fluid.layers.batch ArgSpec(args=['reader', 'batch_size'], varargs=None, k ...@@ -178,6 +178,7 @@ paddle.fluid.layers.batch ArgSpec(args=['reader', 'batch_size'], varargs=None, k
paddle.fluid.layers.double_buffer ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.double_buffer ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.random_data_generator ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)) paddle.fluid.layers.random_data_generator ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.layers.py_reader ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)) paddle.fluid.layers.py_reader ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.layers.create_py_reader_by_data ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True))
paddle.fluid.layers.Preprocessor.__init__ ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.Preprocessor.__init__ ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.Preprocessor.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) paddle.fluid.layers.Preprocessor.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.layers.Preprocessor.inputs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.Preprocessor.inputs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
......
...@@ -12,6 +12,5 @@ endif(NOT WIN32) ...@@ -12,6 +12,5 @@ endif(NOT WIN32)
if(WITH_INFERENCE) if(WITH_INFERENCE)
# NOTE: please add subdirectory inference at last. # NOTE: please add subdirectory inference at last.
add_subdirectory(inference) add_subdirectory(inference)
add_subdirectory(train)
endif() endif()
add_subdirectory(train)
...@@ -27,9 +27,6 @@ void SetConfig(AnalysisConfig *cfg) { ...@@ -27,9 +27,6 @@ void SetConfig(AnalysisConfig *cfg) {
cfg->device = 0; cfg->device = 0;
cfg->enable_ir_optim = true; cfg->enable_ir_optim = true;
cfg->specify_input_name = true; cfg->specify_input_name = true;
#ifdef PADDLE_WITH_MKLDNN
cfg->_use_mkldnn = true;
#endif
} }
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <cub/cub.cuh> // NOLINT #include <cub/cub.cuh> // NOLINT
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -293,7 +294,12 @@ void TensorReduce(const framework::Tensor& x, framework::Tensor* y, ...@@ -293,7 +294,12 @@ void TensorReduce(const framework::Tensor& x, framework::Tensor* y,
} }
auto x_data = x.data<Tx>(); auto x_data = x.data<Tx>();
auto y_data = y->mutable_data<Ty>(x.place()); auto y_data = y->mutable_data<Ty>(x.place());
if (reduce_num == 1) return; if (reduce_num == 1) {
auto out_dims = y->dims();
framework::TensorCopy(x, y->place(), y);
y->Resize(out_dims);
return;
}
#define CUB_BLOCK_DIM_CASE(block_dim) \ #define CUB_BLOCK_DIM_CASE(block_dim) \
case block_dim: { \ case block_dim: { \
......
...@@ -600,7 +600,7 @@ EOF ...@@ -600,7 +600,7 @@ EOF
if [[ ${WITH_GPU} == "ON" ]]; then if [[ ${WITH_GPU} == "ON" ]]; then
NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.2.13-1+cuda${CUDA_MAJOR} libnccl-dev=2.2.13-1+cuda${CUDA_MAJOR} || true" NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.2.13-1+cuda${CUDA_MAJOR} libnccl-dev=2.2.13-1+cuda${CUDA_MAJOR} || true"
else else
NCCL_DEPS="" NCCL_DEPS="true"
fi fi
if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then
......
...@@ -30,7 +30,8 @@ from ..unique_name import generate as unique_name ...@@ -30,7 +30,8 @@ from ..unique_name import generate as unique_name
__all__ = [ __all__ = [
'data', 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer', 'data', 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer',
'random_data_generator', 'py_reader', 'Preprocessor', 'load' 'random_data_generator', 'py_reader', 'create_py_reader_by_data',
'Preprocessor', 'load'
] ]
...@@ -470,6 +471,158 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): ...@@ -470,6 +471,158 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
return monkey_patch_reader_methods(main_prog_var) return monkey_patch_reader_methods(main_prog_var)
def _py_reader(capacity,
shapes,
dtypes,
lod_levels=None,
name=None,
use_double_buffer=True,
feed_list=None):
if feed_list is not None:
if not isinstance(feed_list, list):
raise TypeError("feed_list should be a list of Variable"
" instead of " + str(type(feed_list)))
lod_levels = []
dtypes = []
shape_concat = []
ranks = []
shapes = []
for data in feed_list:
dtypes.append(data.dtype)
shape_concat.extend(data.shape)
ranks.append(len(data.shape))
shapes.append(data.shape)
lod_levels.append(data.lod_level)
else:
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
shape_concat = []
ranks = []
for shape in shapes:
shape_concat.extend(shape)
ranks.append(len(shape))
if lod_levels is None:
lod_levels = [0] * len(shapes)
if name is None:
queue_name = unique_name('lod_tensor_blocking_queue')
reader_name = unique_name('create_py_reader')
double_buffer_name = unique_name('double_buffer')
else:
queue_name = "_".join([name, "queue"])
reader_name = "_".join([name, "reader"])
double_buffer_name = "_".join([name, "double_buffer"])
var = global_scope().var(queue_name)
feed_queue = core.init_lod_tensor_blocking_queue(var, capacity, shapes)
startup_blk = default_startup_program().current_block()
startup_var = startup_blk.create_var(name=reader_name)
startup_blk.append_op(
type='create_py_reader',
inputs={'blocking_queue': [queue_name]},
outputs={'Out': [startup_var]},
attrs={
'shape_concat': shape_concat,
'lod_levels': lod_levels,
'ranks': ranks
})
startup_var.desc.set_dtypes(dtypes)
startup_var.persistable = True
main_prog_var = _copy_reader_var_(default_main_program().current_block(),
startup_var)
reader = monkey_patch_reader_methods(main_prog_var)
if use_double_buffer:
double_buffer_reader = double_buffer(reader, name=double_buffer_name)
# we return a double buffer reader. However, the reset method comes from
# py_reader.
double_buffer_reader.reset = reader.reset
reader = double_buffer_reader
# monkey patch py_reader special methods
reader.queue = feed_queue
current_reset_method = reader.reset
reader.thread = None
reader.tensor_provider = None
reader.exited = False
def start_provide_thread(func):
def __provider_thread__():
for tensors in func():
array = core.LoDTensorArray()
for item in tensors:
if not isinstance(item, core.LoDTensor):
tmp = core.LoDTensor()
tmp.set(item, core.CPUPlace())
item = tmp
array.append(item)
if reader.exited:
break
feed_queue.push(array)
if reader.exited:
break
feed_queue.close()
reader.thread = threading.Thread(target=__provider_thread__)
reader.thread.daemon = True
reader.thread.start()
def __set_tensor_provider__(func):
reader.tensor_provider = func
def __set_paddle_reader__(paddle_reader):
with program_guard(Program(), Program()):
actual_feed_list = feed_list
if actual_feed_list is None:
actual_feed_list = []
counter = 0
for dtype, shape, lod_level in zip(dtypes, shapes, lod_levels):
name = str(counter)
actual_feed_list.append(
data(
name=name,
dtype=dtype,
shape=shape,
lod_level=lod_level))
counter += 1
feeder = DataFeeder(
feed_list=actual_feed_list, place=core.CPUPlace())
paddle_reader = feeder.decorate_reader(
paddle_reader, multi_devices=False)
def __tensor_provider__():
for slots in paddle_reader():
yield [slots[str(idx)] for idx in six.moves.xrange(counter)]
__set_tensor_provider__(__tensor_provider__)
def __reset__():
current_reset_method()
if reader.thread is not None and reader.tensor_provider is not None:
reader.exited = True
reader.thread.join()
reader.exited = False
def __start__():
start_provide_thread(reader.tensor_provider)
reader.reset = __reset__
reader.decorate_tensor_provider = __set_tensor_provider__
reader.decorate_paddle_reader = __set_paddle_reader__
reader.start = __start__
return reader
def py_reader(capacity, def py_reader(capacity,
shapes, shapes,
dtypes, dtypes,
...@@ -594,128 +747,72 @@ def py_reader(capacity, ...@@ -594,128 +747,72 @@ def py_reader(capacity,
>>> except fluid.core.EOFException: >>> except fluid.core.EOFException:
>>> test_reader.reset() >>> test_reader.reset()
""" """
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes] return _py_reader(
shape_concat = [] capacity=capacity,
ranks = [] shapes=shapes,
dtypes=dtypes,
for shape in shapes: lod_levels=lod_levels,
shape_concat.extend(shape) name=name,
ranks.append(len(shape)) use_double_buffer=use_double_buffer)
if lod_levels is None:
lod_levels = [0] * len(shapes)
if name is None:
queue_name = unique_name('lod_tensor_blocking_queue')
reader_name = unique_name('create_py_reader')
double_buffer_name = unique_name('double_buffer')
else:
queue_name = "_".join([name, "queue"])
reader_name = "_".join([name, "reader"])
double_buffer_name = "_".join([name, "double_buffer"])
var = global_scope().var(queue_name)
feed_queue = core.init_lod_tensor_blocking_queue(var, capacity, shapes)
startup_blk = default_startup_program().current_block()
startup_var = startup_blk.create_var(name=reader_name)
startup_blk.append_op(
type='create_py_reader',
inputs={'blocking_queue': [queue_name]},
outputs={'Out': [startup_var]},
attrs={
'shape_concat': shape_concat,
'lod_levels': lod_levels,
'ranks': ranks
})
startup_var.desc.set_dtypes(dtypes)
startup_var.persistable = True
main_prog_var = _copy_reader_var_(default_main_program().current_block(),
startup_var)
reader = monkey_patch_reader_methods(main_prog_var)
if use_double_buffer:
double_buffer_reader = double_buffer(reader, name=double_buffer_name)
# we return a double buffer reader. However, the reset method comes from
# py_reader.
double_buffer_reader.reset = reader.reset
reader = double_buffer_reader
# monkey patch py_reader special methods
reader.queue = feed_queue
current_reset_method = reader.reset
reader.thread = None
reader.tensor_provider = None
reader.exited = False
def start_provide_thread(func):
def __provider_thread__():
for tensors in func():
array = core.LoDTensorArray()
for item in tensors:
if not isinstance(item, core.LoDTensor):
tmp = core.LoDTensor()
tmp.set(item, core.CPUPlace())
item = tmp
array.append(item)
if reader.exited: def create_py_reader_by_data(capacity,
break feed_list,
feed_queue.push(array) name=None,
if reader.exited: use_double_buffer=True):
break """
feed_queue.close() Create a Python reader for data feeding in Python
reader.thread = threading.Thread(target=__provider_thread__)
reader.thread.daemon = True
reader.thread.start()
def __set_tensor_provider__(func):
reader.tensor_provider = func
def __set_paddle_reader__(paddle_reader): This layer returns a Reader Variable.
with program_guard(Program(), Program()):
feed_list = []
counter = 0
for dtype, shape, lod_level in zip(dtypes, shapes, lod_levels):
name = str(counter)
feed_list.append(
data(
name=name,
dtype=dtype,
shape=shape,
lod_level=lod_level))
counter += 1
feeder = DataFeeder(feed_list=feed_list, place=core.CPUPlace())
paddle_reader = feeder.decorate_reader(
paddle_reader, multi_devices=False)
def __tensor_provider__(): Works much like py_reader except that it's input is feed_list
for slots in paddle_reader(): instead of shapes, dtypes and lod_levels
yield [slots[str(idx)] for idx in six.moves.xrange(counter)]
__set_tensor_provider__(__tensor_provider__) Args:
capacity(int): The buffer capacity maintained by :code:`py_reader`.
feed_list(list(Variable)): The data feed list.
name(basestring): The prefix Python queue name and Reader name. None will
be generated automatically.
use_double_buffer(bool): Whether use double buffer or not.
def __reset__(): Returns:
current_reset_method() Variable: A Reader from which we can get feeding data.
if reader.thread is not None and reader.tensor_provider is not None:
reader.exited = True
reader.thread.join()
reader.exited = False
def __start__(): Examples:
start_provide_thread(reader.tensor_provider)
reader.reset = __reset__ 1. The basic usage of :code:`py_reader` is as follows:
reader.decorate_tensor_provider = __set_tensor_provider__
reader.decorate_paddle_reader = __set_paddle_reader__
reader.start = __start__
return reader >>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>>
>>> image = fluid.layers.data(name='image', shape=[3,224,224], dtypes='float32')
>>> label = fluid.layers.data(name='label', shape=[1], dtypes='int64')
>>> reader = fluid.layers.create_py_reader_by_data(capacity=64, feed_list=[image, label])
>>> reader.decorate_paddle_reader(
>>> paddle.reader.shuffle(paddle.batch(mnist.train())
>>>
>>> img, label = fluid.layers.read_file(reader)
>>> loss = network(img, label) # some network definition
>>>
>>> fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
>>>
>>> exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
>>> for epoch_id in range(10):
>>> reader.start()
>>> try:
>>> while True:
>>> exe.run(fetch_list=[loss.name])
>>> except fluid.core.EOFException:
>>> reader.reset()
"""
return _py_reader(
capacity=capacity,
shapes=None,
dtypes=None,
lod_levels=None,
name=name,
use_double_buffer=use_double_buffer,
feed_list=feed_list)
def open_files(filenames, def open_files(filenames,
......
...@@ -53,13 +53,22 @@ def simple_fc_net(in_size, ...@@ -53,13 +53,22 @@ def simple_fc_net(in_size,
hidden_sizes, hidden_sizes,
batch_size, batch_size,
queue_capacity, queue_capacity,
use_double_buffer=False): use_double_buffer=False,
reader = fluid.layers.py_reader( use_feed_list=True):
capacity=queue_capacity, if use_feed_list:
shapes=[[-1, in_size], [-1, 1]], data = fluid.layers.data(name="data", dtype='float32', shape=[in_size])
lod_levels=[0, 0], label = fluid.layers.data(name='label', dtype='int64', shape=[1])
dtypes=['float32', 'int64'], reader = fluid.layers.create_py_reader_by_data(
use_double_buffer=False) capacity=queue_capacity,
use_double_buffer=False,
feed_list=[data, label])
else:
reader = fluid.layers.py_reader(
capacity=queue_capacity,
shapes=[[-1, in_size], [-1, 1]],
lod_levels=[0, 0],
dtypes=['float32', 'int64'],
use_double_buffer=False)
feed_queue = reader.queue feed_queue = reader.queue
reader = fluid.layers.batch(reader, batch_size=batch_size) reader = fluid.layers.batch(reader, batch_size=batch_size)
if use_double_buffer: if use_double_buffer:
...@@ -100,14 +109,16 @@ class TestPyReaderUsingExecutor(unittest.TestCase): ...@@ -100,14 +109,16 @@ class TestPyReaderUsingExecutor(unittest.TestCase):
if core.is_compiled_with_cuda() else [False]): if core.is_compiled_with_cuda() else [False]):
for use_parallel_executor in [False, True]: for use_parallel_executor in [False, True]:
for use_double_buffer in [False, True]: for use_double_buffer in [False, True]:
print('Test Parameters:'), for use_feed_list in [False, True]:
print({ print('Test Parameters:'),
'use_cuda': use_cuda, print({
'use_parallel_executor': use_parallel_executor, 'use_cuda': use_cuda,
'use_double_buffer': use_double_buffer 'use_parallel_executor': use_parallel_executor,
}) 'use_double_buffer': use_double_buffer,
self.main(use_cuda, use_parallel_executor, 'use_feed_list': use_feed_list
use_double_buffer) })
self.main(use_cuda, use_parallel_executor,
use_double_buffer, use_feed_list)
def random_reader(self): def random_reader(self):
def reader(): def reader():
...@@ -143,12 +154,14 @@ class TestPyReaderUsingExecutor(unittest.TestCase): ...@@ -143,12 +154,14 @@ class TestPyReaderUsingExecutor(unittest.TestCase):
def main(self, def main(self,
use_cuda=True, use_cuda=True,
use_parallel_executor=False, use_parallel_executor=False,
use_double_buffer=False): use_double_buffer=False,
use_feed_list=False):
assert not use_cuda or use_cuda and core.is_compiled_with_cuda() assert not use_cuda or use_cuda and core.is_compiled_with_cuda()
self.use_cuda = use_cuda self.use_cuda = use_cuda
self.use_parallel_executor = use_parallel_executor self.use_parallel_executor = use_parallel_executor
self.use_double_buffer = use_double_buffer self.use_double_buffer = use_double_buffer
self.use_feed_list = use_feed_list
startup_program = fluid.Program() startup_program = fluid.Program()
main_program = fluid.Program() main_program = fluid.Program()
...@@ -160,7 +173,8 @@ class TestPyReaderUsingExecutor(unittest.TestCase): ...@@ -160,7 +173,8 @@ class TestPyReaderUsingExecutor(unittest.TestCase):
hidden_sizes=self.hidden_sizes, hidden_sizes=self.hidden_sizes,
batch_size=self.batch_size, batch_size=self.batch_size,
queue_capacity=self.queue_capacity, queue_capacity=self.queue_capacity,
use_double_buffer=self.use_double_buffer) use_double_buffer=self.use_double_buffer,
use_feed_list=self.use_feed_list)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
......
...@@ -243,5 +243,87 @@ class TestKeepDimReduceSumMultiAxises(OpTest): ...@@ -243,5 +243,87 @@ class TestKeepDimReduceSumMultiAxises(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class TestReduceSumWithDimOne(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((10, 1, 1)).astype("float64")}
self.attrs = {'dim': [1, 2], 'keep_dim': True}
self.outputs = {
'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']),
keepdims=True)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceSumWithNumelOne(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((1, 1)).astype("float64")}
self.attrs = {'dim': [1], 'keep_dim': False}
self.outputs = {
'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']),
keepdims=False)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceMeanWithDimOne(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((10, 1, 1)).astype("float64")}
self.attrs = {'dim': [1], 'keep_dim': False}
self.outputs = {
'Out': self.inputs['X'].mean(
axis=tuple(self.attrs['dim']), keepdims=False)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceMeanWithNumelOne(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((1, 1)).astype("float64")}
self.attrs = {'dim': [1], 'keep_dim': True}
self.outputs = {
'Out': self.inputs['X'].mean(
axis=tuple(self.attrs['dim']), keepdims=True)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceAll(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((1, 1, 1)).astype("float64")}
self.attrs = {'reduce_all': True, 'keep_dim': False}
self.outputs = {'Out': self.inputs['X'].sum()}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册