diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 5d1851fb85aa2fa04f59ca440aafed14644d0c06..637c3b9107a7d63aafd97077f63fa93f58e1b470 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -1014,5 +1014,135 @@ std::vector meshgrid_grad_impl( return api_output; } +std::vector multi_dot_grad_impl(const std::vector& x, + const Tensor& out_grad) { + Backend kernel_backend = Backend::UNDEFINED; + DataLayout kernel_layout = DataLayout::UNDEFINED; + DataType kernel_data_type = DataType::UNDEFINED; + + if (kernel_backend == Backend::UNDEFINED || + kernel_layout == DataLayout::UNDEFINED || + kernel_data_type == DataType::UNDEFINED) { + auto kernel_key_set = ParseKernelKeyByInputArgs(x, out_grad); + auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); + if (kernel_backend == Backend::UNDEFINED) { + kernel_backend = kernel_key.backend(); + } + if (kernel_layout == DataLayout::UNDEFINED) { + kernel_layout = kernel_key.layout(); + } + if (kernel_data_type == DataType::UNDEFINED) { + kernel_data_type = kernel_key.dtype(); + } + } + + VLOG(6) << "multi_dot_grad API kernel key: [" << kernel_backend << ", " + << kernel_layout << ", " << kernel_data_type << "]"; + const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( + "multi_dot_grad", {kernel_backend, kernel_layout, kernel_data_type}); + VLOG(6) << "multi_dot_grad API kernel: " << kernel; + + auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); + + auto input_x_vec = PrepareData(x, kernel.InputAt(0), {}); + std::vector input_x(input_x_vec->size()); + for (size_t i = 0; i < input_x.size(); ++i) { + input_x[i] = &input_x_vec->at(i); + } + auto input_out_grad = PrepareData(out_grad, kernel.InputAt(1), {}); + + size_t out_number = input_x.size(); + std::vector api_output; + auto kernel_out = SetKernelOutput(out_number, kernel_backend, &api_output); + + auto x_meta_vec = MakeMetaTensor(input_x); + std::vector x_metas(x_meta_vec.size()); + for (size_t i = 0; i < x_meta_vec.size(); ++i) { + x_metas[i] = &x_meta_vec[i]; + } + + std::vector meta_outs; + meta_outs.reserve(out_number); + std::vector meta_out_ptrs; + meta_out_ptrs.reserve(out_number); + for (size_t i = 0; i < out_number; ++i) { + meta_outs.push_back(kernel_out[i]); + meta_out_ptrs.push_back(&meta_outs.back()); + } + + phi::MultiDotGradInferMeta( + x_metas, MakeMetaTensor(*input_out_grad), meta_out_ptrs); + + using kernel_signature = void (*)(const platform::DeviceContext&, + const std::vector&, + const phi::DenseTensor&, + std::vector&); + auto* kernel_fn = kernel.GetVariadicKernelFn(); + (*kernel_fn)(*dev_ctx, input_x, *input_out_grad, kernel_out); + + return api_output; +} + +std::vector multiplex_grad_impl(const std::vector& inputs, + const Tensor& ids, + const Tensor& out_grad) { + Backend kernel_backend = Backend::UNDEFINED; + DataLayout kernel_layout = DataLayout::UNDEFINED; + DataType kernel_data_type = DataType::UNDEFINED; + + if (kernel_backend == Backend::UNDEFINED || + kernel_layout == DataLayout::UNDEFINED || + kernel_data_type == DataType::UNDEFINED) { + auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad); + auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); + if (kernel_backend == Backend::UNDEFINED) { + kernel_backend = kernel_key.backend(); + } + if (kernel_layout == DataLayout::UNDEFINED) { + kernel_layout = kernel_key.layout(); + } + if (kernel_data_type == DataType::UNDEFINED) { + kernel_data_type = kernel_key.dtype(); + } + } + + VLOG(6) << "multiplex_grad API kernel key: [" << kernel_backend << ", " + << kernel_layout << ", " << kernel_data_type << "]"; + const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( + "multiplex_grad", {kernel_backend, kernel_layout, kernel_data_type}); + VLOG(6) << "multiplex_grad API kernel: " << kernel; + + auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); + + auto input_ids = PrepareData(ids, kernel.InputAt(0), {}); + auto input_out_grad = PrepareData(out_grad, kernel.InputAt(1), {}); + + auto out_number = inputs.size(); + std::vector api_output; + auto kernel_out = SetKernelOutput(out_number, kernel_backend, &api_output); + + std::vector meta_outs; + meta_outs.reserve(out_number); + std::vector meta_out_ptrs; + meta_out_ptrs.reserve(out_number); + for (size_t i = 0; i < out_number; ++i) { + meta_outs.push_back(kernel_out[i]); + meta_out_ptrs.push_back(&meta_outs.back()); + } + + phi::MultiplexGradInferMeta(MakeMetaTensor(*input_ids), + MakeMetaTensor(*input_out_grad), + meta_out_ptrs); + + using kernel_signature = void (*)(const platform::DeviceContext&, + const phi::DenseTensor&, + const phi::DenseTensor&, + std::vector&); + auto* kernel_fn = kernel.GetVariadicKernelFn(); + (*kernel_fn)(*dev_ctx, *input_ids, *input_out_grad, kernel_out); + + return api_output; +} + } // namespace experimental } // namespace paddle diff --git a/paddle/phi/api/lib/api_custom_impl.h b/paddle/phi/api/lib/api_custom_impl.h index 80ace229316a92b3b190557bdee3fc70a2ebe2c4..0e360ce4a993f024dbf3eff4896b56f9ddf4fe60 100644 --- a/paddle/phi/api/lib/api_custom_impl.h +++ b/paddle/phi/api/lib/api_custom_impl.h @@ -62,6 +62,8 @@ std::vector split_impl(const Tensor& x, const IntArray& num_or_sections, const Scalar& axis); +std::vector meshgrid_impl(const std::vector& inputs); + std::tuple momentum_impl( const Tensor& param, const Tensor& grad, @@ -109,9 +111,15 @@ Tensor real_grad_impl(const Tensor& x); std::vector stack_grad_impl(const std::vector& x, const Tensor& out_grad, int axis); -std::vector meshgrid_impl(const std::vector& inputs); std::vector meshgrid_grad_impl(const std::vector& inputs, const std::vector& outputs_grad); +std::vector multi_dot_grad_impl(const std::vector& x, + const Tensor& out_grad); + +std::vector multiplex_grad_impl(const std::vector& inputs, + const Tensor& ids, + const Tensor& out_grad); + } // namespace experimental } // namespace paddle diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc index c0c50d68868ea95942386ccb1da9e190251525d4..efbf02e3314333f1e12a1b65856309822a3d2465 100644 --- a/paddle/phi/infermeta/backward.cc +++ b/paddle/phi/infermeta/backward.cc @@ -329,6 +329,38 @@ void MeshgridGradInferMeta(const std::vector& inputs, } } +void MultiDotGradInferMeta(const std::vector& x, + const MetaTensor& out_grad, + std::vector x_grad) { + PADDLE_ENFORCE_EQ( + x.size(), + x_grad.size(), + errors::InvalidArgument( + "Number of Inputs(X) should be equal with Outputs(X@Grad)." + "But received Inputs(X)' size = %d , Outputs(X@Grad)' size = %d.", + x.size(), + x_grad.size())); + for (size_t i = 0; i < x.size(); i++) { + if (x_grad[i] != nullptr) { + x_grad[i]->set_dims(x[i]->dims()); + x_grad[i]->share_lod(*x[i]); + } + } +} + +void MultiplexGradInferMeta(const MetaTensor& ids, + const MetaTensor& out_grad, + std::vector ins_grad) { + PADDLE_ENFORCE_NE( + ins_grad.empty(), + true, + errors::InvalidArgument("Output(X@Grad) should not be null.")); + auto dout_dim = out_grad.dims(); + for (auto in_grad : ins_grad) { + in_grad->set_dims(dout_dim); + } +} + void NllLossGradInferMeta(const MetaTensor& x, const MetaTensor& label, paddle::optional weight, diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h index ad375e609313da5ecaab48c1b5ff439fd80e170e..6e730c83d1d5065cf95331f460aa850c3127232b 100644 --- a/paddle/phi/infermeta/backward.h +++ b/paddle/phi/infermeta/backward.h @@ -155,6 +155,14 @@ void MeshgridGradInferMeta(const std::vector& inputs, const std::vector& outputs_grad, std::vector inputs_grad); +void MultiDotGradInferMeta(const std::vector& x, + const MetaTensor& out_grad, + std::vector x_grad); + +void MultiplexGradInferMeta(const MetaTensor& ids, + const MetaTensor& out_grad, + std::vector ins_grad); + void NllLossGradInferMeta(const MetaTensor& input, const MetaTensor& label, paddle::optional weight, diff --git a/paddle/phi/kernels/impl/multi_dot_kernel_impl.h b/paddle/phi/kernels/impl/multi_dot_kernel_impl.h index 0833e94fe2c189f00e6dadff3f108753d0f66221..039b056200fddfc239f724634d0307d8a793ad76 100644 --- a/paddle/phi/kernels/impl/multi_dot_kernel_impl.h +++ b/paddle/phi/kernels/impl/multi_dot_kernel_impl.h @@ -339,8 +339,8 @@ void MultiDotGradMatChainOrder(const Context& ctx, template void MultiDotGradKernel(const Context& ctx, - const DenseTensor& out_grad, const std::vector& x, + const DenseTensor& out_grad, std::vector x_grad) { auto ins = x; auto dout = out_grad; diff --git a/paddle/phi/kernels/multi_dot_grad_kernel.h b/paddle/phi/kernels/multi_dot_grad_kernel.h index e6d8ecd744e12a9b17bcd954eaf19093d8c694df..f495c7045207991aa51d109fd41600d71f9b76ca 100644 --- a/paddle/phi/kernels/multi_dot_grad_kernel.h +++ b/paddle/phi/kernels/multi_dot_grad_kernel.h @@ -20,8 +20,8 @@ namespace phi { template void MultiDotGradKernel(const Context& ctx, - const DenseTensor& out_grad, const std::vector& x, + const DenseTensor& out_grad, std::vector x_grad); } // namespace phi diff --git a/paddle/phi/ops/compat/multi_dot_sig.cc b/paddle/phi/ops/compat/multi_dot_sig.cc index 598cbd980f3cc5ece0168b9cbb4f91e654f3f8ab..2e05bd6d1557acc2a18b5a2c31ecb928bbb50ec3 100644 --- a/paddle/phi/ops/compat/multi_dot_sig.cc +++ b/paddle/phi/ops/compat/multi_dot_sig.cc @@ -19,7 +19,7 @@ namespace phi { KernelSignature MultiDotGradOpArgumentMapping( const ArgumentMappingContext& ctx) { return KernelSignature( - "multi_dot_grad", {GradVarName("Out"), "X"}, {}, {GradVarName("X")}); + "multi_dot_grad", {"X", GradVarName("Out")}, {}, {GradVarName("X")}); } } // namespace phi diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index fdc348f3a837771f174e8ac591f9830a63ec1ab0..949964b8281fdca3a9ba4e066f3891970cb321f0 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -5971,8 +5971,11 @@ def multiplex(inputs, index, name=None): print(res) # [array([[5., 6.], [3., 4.]], dtype=float32)] """ - if _non_static_mode(): + + if _in_legacy_dygraph(): return _C_ops.multiplex(index, inputs) + if in_dygraph_mode(): + return _C_ops.final_state_multiplex(inputs, index) helper = LayerHelper('multiplex', **locals()) check_type(inputs, 'inputs', (list), 'multiplex') diff --git a/python/paddle/fluid/tests/unittests/test_maxout_op.py b/python/paddle/fluid/tests/unittests/test_maxout_op.py index fac400caacdab5d6bff009bd8e84a9b07d1cff0c..4bc7b09c71e6eba134b47b4513423e88690bf977 100644 --- a/python/paddle/fluid/tests/unittests/test_maxout_op.py +++ b/python/paddle/fluid/tests/unittests/test_maxout_op.py @@ -21,6 +21,7 @@ import paddle.fluid as fluid import paddle.fluid.core as core import paddle.nn.functional as F from op_test import OpTest +from paddle.fluid.framework import _test_eager_guard paddle.enable_static() np.random.seed(1) @@ -38,6 +39,7 @@ def maxout_forward_naive(x, groups, channel_axis): class TestMaxOutOp(OpTest): def setUp(self): self.op_type = "maxout" + self.python_api = paddle.nn.functional.maxout self.dtype = 'float64' self.shape = [3, 6, 2, 4] self.groups = 2 @@ -55,10 +57,10 @@ class TestMaxOutOp(OpTest): pass def test_check_output(self): - self.check_output() + self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad(['X'], 'Out') + self.check_grad(['X'], 'Out', check_eager=True) class TestMaxOutOpAxis0(TestMaxOutOp): @@ -144,6 +146,10 @@ class TestMaxoutAPI(unittest.TestCase): x_float32 = paddle.fluid.data(name='x_float32', shape=[2, 4, 6, 8]) self.assertRaises(ValueError, F.maxout, x_float32, 2, 2) + def test_dygraph_final_state_api(self): + with _test_eager_guard(): + self.test_dygraph_api() + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_multi_dot_op.py b/python/paddle/fluid/tests/unittests/test_multi_dot_op.py index 8856624b4efc72577019b10ae7ef932a413daf1e..11c04363170766b21d768294d471d9034402d0ff 100644 --- a/python/paddle/fluid/tests/unittests/test_multi_dot_op.py +++ b/python/paddle/fluid/tests/unittests/test_multi_dot_op.py @@ -18,6 +18,7 @@ from op_test import OpTest, skip_check_grad_ci from numpy.linalg import multi_dot from op_test import OpTest import paddle +from paddle.fluid.framework import _test_eager_guard paddle.enable_static() @@ -27,6 +28,7 @@ paddle.enable_static() class TestMultiDotOp(OpTest): def setUp(self): self.op_type = "multi_dot" + self.python_api = paddle.linalg.multi_dot self.dtype = self.get_dtype() self.get_inputs_and_outputs() @@ -40,11 +42,11 @@ class TestMultiDotOp(OpTest): self.outputs = {'Out': multi_dot([self.A, self.B])} def test_check_output(self): - self.check_output() + self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad(['x0'], 'Out') - self.check_grad(['x1'], 'Out') + self.check_grad(['x0'], 'Out', check_eager=True) + self.check_grad(['x1'], 'Out', check_eager=True) #(A*B)*C @@ -57,9 +59,9 @@ class TestMultiDotOp3Mat(TestMultiDotOp): self.outputs = {'Out': multi_dot([self.A, self.B, self.C])} def test_check_grad(self): - self.check_grad(['x0'], 'Out') - self.check_grad(['x1'], 'Out') - self.check_grad(['x2'], 'Out') + self.check_grad(['x0'], 'Out', check_eager=True) + self.check_grad(['x1'], 'Out', check_eager=True) + self.check_grad(['x2'], 'Out', check_eager=True) #A*(B*C) @@ -72,9 +74,9 @@ class TestMultiDotOp3Mat2(TestMultiDotOp): self.outputs = {'Out': multi_dot([self.A, self.B, self.C])} def test_check_grad(self): - self.check_grad(['x0'], 'Out') - self.check_grad(['x1'], 'Out') - self.check_grad(['x2'], 'Out') + self.check_grad(['x0'], 'Out', check_eager=True) + self.check_grad(['x1'], 'Out', check_eager=True) + self.check_grad(['x2'], 'Out', check_eager=True) class TestMultiDotOp4Mat(TestMultiDotOp): @@ -90,10 +92,10 @@ class TestMultiDotOp4Mat(TestMultiDotOp): self.outputs = {'Out': multi_dot([self.A, self.B, self.C, self.D])} def test_check_grad(self): - self.check_grad(['x0'], 'Out') - self.check_grad(['x1'], 'Out') - self.check_grad(['x2'], 'Out') - self.check_grad(['x3'], 'Out') + self.check_grad(['x0'], 'Out', check_eager=True) + self.check_grad(['x1'], 'Out', check_eager=True) + self.check_grad(['x2'], 'Out', check_eager=True) + self.check_grad(['x3'], 'Out', check_eager=True) class TestMultiDotOpFirst1D(TestMultiDotOp): @@ -143,9 +145,9 @@ class TestMultiDotOp3MatLast1D(TestMultiDotOp3Mat): self.outputs = {'Out': multi_dot([self.A, self.B, self.C])} def test_check_grad(self): - self.check_grad(['x0'], 'Out') - self.check_grad(['x1'], 'Out') - self.check_grad(['x2'], 'Out') + self.check_grad(['x0'], 'Out', check_eager=True) + self.check_grad(['x1'], 'Out', check_eager=True) + self.check_grad(['x2'], 'Out', check_eager=True) class TestMultiDotOp4MatLast1D(TestMultiDotOp4Mat): @@ -260,6 +262,10 @@ class APITestMultiDot(unittest.TestCase): expected_result = np.linalg.multi_dot([input_array1, input_array2]) self.assertTrue(np.allclose(expected_result, out.numpy())) + def test_dygraph_final_state_api(self): + with _test_eager_guard(): + self.test_dygraph_without_out() + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index 90283b632ef2bd385db293ca2879878146f7f5f0..a0efdaac8ff7c6cacbeccbb6d0a411c131a462d8 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -684,10 +684,10 @@ def maxout(x, groups, axis=1, name=None): # [0.95313174 0.6228939 0.7129065 0.7087491 ] # [0.7142536 0.88725346 0.61093384 0.38833922]]]] """ - - if in_dynamic_mode(): + if _in_legacy_dygraph(): return _C_ops.maxout(x, 'groups', groups, 'axis', axis) - + if in_dygraph_mode(): + return _C_ops.final_state_maxout(x, groups, axis) check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'maxout') if axis not in [1, -1, 3]: raise ValueError( diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 51df977c006441c802919ac2618216742ebbb2b5..253f27a439f7578706e877e73b327928a48805c9 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -2274,8 +2274,10 @@ def multi_dot(x, name=None): # [10, 7] """ - if paddle.in_dynamic_mode(): + if _in_legacy_dygraph(): return _C_ops.multi_dot(x) + if in_dygraph_mode(): + return _C_ops.final_state_multi_dot(x) check_type(x, 'x', (list, tuple), 'multi_dot') for id, item in enumerate(x): diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 6df8c6efcc03dcc990d80a97a734e9aef854b58d..5ab85905ba61d058e7c73d961dcf76441bd843cd 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -1283,6 +1283,15 @@ func : maximum backward : maximum_grad +- api : maxout + args : (Tensor x, int groups, int axis) + output : Tensor(out) + infer_meta : + func : MaxOutInferMeta + kernel : + func : maxout + backward : maxout_grad + - api : mean args : (Tensor x, int64_t[] dims={}, bool keep_dim=false) output : Tensor(out) @@ -1359,6 +1368,15 @@ invoke : momentum_impl(param, grad, velocity, learning_rate, master_param, mu, use_nesterov, regularization_method, regularization_coeff, multi_precision, rescale_grad) optional : master_param +- api : multi_dot + args : (Tensor[] x) + output : Tensor + infer_meta : + func : MultiDotInferMeta + kernel : + func : multi_dot + backward : multi_dot_grad + # multinomial - api : multinomial args : (Tensor x, int num_samples, bool replacement) @@ -1368,6 +1386,16 @@ kernel : func : multinomial +- api : multiplex + args : (Tensor[] ins, Tensor ids) + output : Tensor + infer_meta : + func : MultiplexInferMeta + kernel : + func : multiplex + data_type : ins + backward : multiplex_grad + - api : multiply args : (Tensor x, Tensor y) output : Tensor diff --git a/python/paddle/utils/code_gen/api_base.py b/python/paddle/utils/code_gen/api_base.py index cc88cf01a012ce6076d19b1819d85dbf71795ff6..de30b45c31e947bdd08a1a2f864cce54c0dee533 100644 --- a/python/paddle/utils/code_gen/api_base.py +++ b/python/paddle/utils/code_gen/api_base.py @@ -600,7 +600,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self if self.inputs['input_info'][param] == "const Tensor&": kernel_args = kernel_args + "*" + PREFIX_TENSOR_NAME + param + ", " elif self.inputs['input_info'][ - input_name] == "const std::vector&": + param] == "const std::vector&": kernel_args = kernel_args + PREFIX_TENSOR_NAME + param + ", " else: # do nothing diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index 038097d72e3f171b2fb0483d29af6051046f13d1..db4e08792a43c21d71d73bbf913e424dc39edb9d 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -920,6 +920,16 @@ kernel : func : maximum_grad +- backward_api : maxout_grad + forward : maxout(Tensor x, int groups, int axis) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad, int groups, int axis) + output : Tensor(x_grad) + infer_meta : + func : GeneralUnaryGradInferMeta + param: [x] + kernel : + func : maxout_grad + - backward_api : mean_all_grad forward : mean_all(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) @@ -998,6 +1008,18 @@ func : modulo_grad no_need_buffer : x, y +- backward_api : multi_dot_grad + forward : multi_dot (Tensor[] x) -> Tensor(out) + args : (Tensor[] x, Tensor out_grad) + output : Tensor[](x_grad) + invoke : multi_dot_grad_impl(x, out_grad) + +- backward_api : multiplex_grad + forward : multiplex (Tensor[] ins, Tensor ids) -> Tensor(out) + args : (Tensor[] ins, Tensor ids, Tensor out_grad) + output : Tensor[](ins_grad) + invoke : multiplex_grad_impl(ins, ids, out_grad) + - backward_api : multiply_grad forward : multiply (Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1)