diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 0b5e83efef6efc60f9f0476747aa107994c64051..52af3ce51ba67c2b58c5e79c18c8d554e3c4b68c 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -11,7 +11,7 @@ paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b54f403e57825a1592aece03afe3afb6')) paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '0ef753f5cec69fef9ae6ad8b867b33a2')) paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '78e512cabeda9c7f42cb7c7e88967ae7')) +paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03')) paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'aba8093edebf2d5c869b735b92811e45')) paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0')) paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2')) diff --git a/paddle/fluid/framework/details/async_ssa_graph_executor.h b/paddle/fluid/framework/details/async_ssa_graph_executor.h index 7d7296772d847e37e604535a281285585beec811..6aaf8f9a165f2eae3a64874e60084e4d9bdbc182 100644 --- a/paddle/fluid/framework/details/async_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/async_ssa_graph_executor.h @@ -14,7 +14,9 @@ #pragma once +#include #include +#include #include #include "ThreadPool.h" diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index a86542efec2118bad62fe7cb49620d5004373fac..92b69334b8e8b9ac895d550b4a653668eee82906 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include #include "paddle/fluid/framework/details/memory_optimize_helper.h" #include "paddle/fluid/framework/details/multi_devices_graph_pass.h" diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index 8cb57ad67490c666424e8e6b07094051300431e4..9c807560f5c581f17881a8c5c38e88c6b176b15b 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include diff --git a/paddle/fluid/framework/details/exception_holder.h b/paddle/fluid/framework/details/exception_holder.h index 77ca03b86e6aeb4851063635197ce2d03541810a..f8fd395bd9cc1e569bf7789e6a3adc63b00716ac 100644 --- a/paddle/fluid/framework/details/exception_holder.h +++ b/paddle/fluid/framework/details/exception_holder.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include "glog/logging.h" diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc index 109037c3e6bc4af9d39f31d8f6cca4f185c2435f..8e4f04972108355c2931cfb04ec3647ad94278c0 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc @@ -13,7 +13,10 @@ // limitations under the License. #include #include +#include #include +#include +#include #include #include @@ -167,10 +170,6 @@ std::unique_ptr MultiDevSSAGraphBuilderBase::ApplyImpl( bool is_forwarding = true; bool insert_collection_ops = NeedCollectiveOps(); - if (strategy_.async_mode_) { - // async mode did not need to merge gradient - insert_collection_ops = false; - } for (ir::Node *node : sorted_ops) { if (DealWithSpecialOp(&result, node)) { @@ -749,10 +748,6 @@ bool DistSSAGraphBuilder::DealWithSpecialOp(ir::Graph *result, ir::Node *node) const { bool insert_op = false; if (OpHaveRole(*node, OpRole::kRPC)) { - // in async_mode, each graph will send it's own gradient. - if (strategy_.async_mode_ && node->Op()->Type() == "send") { - return false; - } int op_dev_id = CreateRPCOp(result, node); PADDLE_ENFORCE(op_dev_id != -1, "Can not schedule the RPC operator to the right place."); @@ -768,11 +763,6 @@ bool DistSSAGraphBuilder::DealWithSpecialOp(ir::Graph *result, insert_op = true; need_broadcast_var_ = true; } else if (OpHaveRole(*node, OpRole::kDist)) { - // in async_mode, each graph will send it's own gradient, do not need to - // merge gradient. - if (strategy_.async_mode_ && node->Op()->Type() != "concat") { - return false; - } int op_dev_id = CreateDistTrainOp(result, node); if (node->Op()->Type() == "concat") { // the input(block of parameter) of concat is on different device, @@ -844,7 +834,7 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const { } auto recv_param_grad = boost::get>( node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName())); - if (recv_param_grad.size() == 2U && !strategy_.async_mode_) { + if (recv_param_grad.size() == 2U) { op_dev_id = GetVarDeviceID(recv_param_grad[1]); VLOG(10) << "recv param " << recv_param_grad[0] << " get grad place: " << recv_param_grad[1] diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.h b/paddle/fluid/framework/details/multi_devices_graph_pass.h index 377ba50fccf4abe2ee7c894d64b4728387efcbe4..f7ec9d28de91e5f3ffd4ed3268c1640c0e8991e6 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.h +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.h @@ -14,7 +14,10 @@ #pragma once +#include #include +#include +#include #include #include diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 923e940884555ae71ac8047be1a181531d21f356..778bbab505772608ef7df86de476c15d5ff3f76f 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -16,7 +16,9 @@ #include #include +#include #include +#include #include #include #include diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index ae7cd800adb5f14c1f480bc9fa4bb49204007c9d..6c5f246f95b97715989fea2b838d6a23c9c3bbea 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include +#include #include #include "paddle/fluid/framework/ir/graph_helper.h" diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 987f715066306e6ff74376cc577ed1f679022be3..9a9f4e08fe1ad4311d1ffda8095eea51f76b407c 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include #include diff --git a/paddle/fluid/framework/reader.h b/paddle/fluid/framework/reader.h index 6cf0ec2937935c01759ad36cd22ad19695f5447d..4b400e72a4cacd3848b57ac3ba2b3ef5f9a9a9c4 100644 --- a/paddle/fluid/framework/reader.h +++ b/paddle/fluid/framework/reader.h @@ -16,6 +16,7 @@ #include #include +#include #include #include "paddle/fluid/framework/ddim.h" diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index 86a140f15219001126283aa8b3f76d72fddb28fc..c994c6f642d286d9b52ada667058b064ff242ce6 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -127,6 +127,12 @@ void Conv2DTransposeOpMaker::Make() { "output feature channels," "H is the height of the filter, and W is the width of the filter. " "We enforce groups number == 1 in the convolution transpose scenario."); + AddInput("Bias", + "(Tensor) Bias to be added to each output of filter application." + "The format of output tensor is X (one-dimensional) of size equal" + "to the number of output channels. Only used with MKL-DNN.") + .AsDispensable(); + AddOutput("Output", "(Tensor) The output tensor of convolution transpose operator. " "The format of output tensor is also NCHW."); diff --git a/paddle/fluid/operators/reader/blocking_queue.h b/paddle/fluid/operators/reader/blocking_queue.h index fe3f2f403173396a2b3d53ee14d48b91daa74224..2b7cb16bc7353961864603a5f25331d67e7167ad 100644 --- a/paddle/fluid/operators/reader/blocking_queue.h +++ b/paddle/fluid/operators/reader/blocking_queue.h @@ -16,6 +16,7 @@ #include // NOLINT #include +#include #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h index eeba330d66ea6603279e5f75db4a58e343de72e2..be044085f1435089b3fb736df684358136ea7c10 100644 --- a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h +++ b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include "paddle/fluid/framework/ddim.h" diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 84beb37c1d995161c473c3d8c11402bee0ae7319..2ebaab3b1024878e28ae7064bfc5c3d1d091ad94 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -104,6 +104,7 @@ class ParallelExecutor(object): main_program = main_program if main_program is not None \ else framework.default_main_program() + self._compiled_program = compiler.CompiledProgram(main_program) self._compiled_program.with_data_parallel( loss_name=loss_name, diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py index 9bcdb7b2a975b648471714ab628caf91b6b6f3a9..cc72df51f1e5c0968921c206a59cce5239fe5a83 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py @@ -15,36 +15,22 @@ from __future__ import print_function import unittest +import numpy as np +import paddle.fluid.core as core +from paddle.fluid.tests.unittests.op_test import OpTest -from paddle.fluid.tests.unittests.test_conv2d_transpose_op import TestConv2dTransposeOp, TestWithPad, TestWithStride +from paddle.fluid.tests.unittests.test_conv2d_transpose_op import conv2dtranspose_forward_naive, TestConv2dTransposeOp -class TestMKLDNN(TestConv2dTransposeOp): - def init_op_type(self): - self.is_test = True - self.use_mkldnn = True - self.data_format = "NCHW" - self.op_type = "conv2d_transpose" - self._cpu_only = True - - def test_check_grad(self): - return +def conv2d_bias_naive(out, bias): + _, out_c, _, _ = out.shape - def test_check_grad_no_input(self): - return - - def test_check_grad_no_filter(self): - return + for l in range(out_c): + out[:, l, :, :] = out[:, l, :, :] + bias[l] + return out -class TestMKLDNNWithPad(TestWithPad): - def init_op_type(self): - self.is_test = True - self.use_mkldnn = True - self.data_format = "NCHW" - self.op_type = "conv2d_transpose" - self._cpu_only = True - +class TestConv2dTransposeMKLDNNOp(TestConv2dTransposeOp): def test_check_grad(self): return @@ -54,24 +40,64 @@ class TestMKLDNNWithPad(TestWithPad): def test_check_grad_no_filter(self): return - -class TestMKLDNNWithStride(TestWithStride): def init_op_type(self): - self.is_test = True - self.use_mkldnn = True self.data_format = "NCHW" self.op_type = "conv2d_transpose" self._cpu_only = True - def test_check_grad(self): - return - - def test_check_grad_no_input(self): - return - - def test_check_grad_no_filter(self): - return - - -if __name__ == '__main__': - unittest.main() + def init_test_case(self): + self.use_mkldnn = True + self.is_test = True + self.pad = [0, 0] + self.fuse_bias = False + self.bias_size = None + self.fuse_relu = False + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + self.groups = 1 + + def setUp(self): + TestConv2dTransposeOp.setUp(self) + + output = self.outputs['Output'] + + if self.fuse_bias and self.bias_size is not None: + bias = np.random.random(self.bias_size).astype(self.dtype) + output = conv2d_bias_naive(output, bias) + output = output.astype(self.dtype) + self.attrs['fuse_bias'] = self.fuse_bias + self.inputs['Bias'] = OpTest.np_dtype_to_fluid_dtype(bias) + + if self.fuse_relu: + output = np.maximum(output, 0).astype(self.dtype) + + self.attrs['fuse_bias'] = self.fuse_bias + self.attrs['fuse_relu'] = self.fuse_relu + + self.outputs['Output'] = output + + +class TestMKLDNNFuseBias(TestConv2dTransposeMKLDNNOp): + def init_test_case(self): + TestConv2dTransposeMKLDNNOp.init_test_case(self) + self.pad = [1, 1] + self.fuse_bias = True + self.bias_size = [6] + + +class TestMKLDNNWithPad(TestConv2dTransposeMKLDNNOp): + def init_test_case(self): + TestConv2dTransposeMKLDNNOp.init_test_case(self) + self.pad = [1, 1] + self.input_size = [2, 3, 10, 10] + + +class TestMKLDNNWithStride(TestConv2dTransposeMKLDNNOp): + def init_test_case(self): + TestConv2dTransposeMKLDNNOp.init_test_case(self) + self.pad = [1, 1] + self.stride = [2, 2] + self.input_size = [2, 3, 6, 6] # NCHW diff --git a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py index 4fbda407f12f5b14c847970317c36333d30390d6..5e77ce9b811bc0474f1e0950e15dedf013dcb4ea 100644 --- a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py @@ -178,8 +178,8 @@ class TestAsyncSSAGraphExecutor(unittest.TestCase): main_program=fluid.Program(), startup_program=fluid.Program()): test() - assert int(step_list[0] / 2) == int(step_list[1]) - assert int(step_list[1] / 2) == int(step_list[2]) + assert abs(int(step_list[0] / 2) - int(step_list[1])) < 5 + assert abs(int(step_list[1] / 2) - int(step_list[2])) < 5 if __name__ == "__main__":