diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc index a9eed0d7eb0427e83c2eb1e7c6ed4a2d533778fe..33daeea8599c64c205f4587837f0271496aaa713 100644 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc +++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc @@ -25,6 +25,31 @@ class ReduceMeanMKLDNNKernel : public ReduceMKLDNNKernel { } }; +template +class ReduceMeanGradMKLDNNKernel : public ReduceGradMKLDNNKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const auto* input_x = ctx.Input("X"); + auto input_dims = framework::vectorize(input_x->dims()); + auto reduce_dims = ctx.Attr>("dim"); + + int number_of_elements = 1; + if (!ctx.Attr("reduce_all")) { + for (size_t i = 0; i < reduce_dims.size(); ++i) { + reduce_dims[i] = (reduce_dims[i] >= 0) + ? reduce_dims[i] + : input_dims.size() + reduce_dims[i]; + number_of_elements *= input_dims[reduce_dims[i]]; + } + } else { + number_of_elements = input_x->numel(); + } + + this->RunKernel(ctx, dnnl::algorithm::binary_add, 0.0f, + 1.0L / number_of_elements); + } +}; + } // namespace operators } // namespace paddle @@ -32,3 +57,7 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(reduce_mean, MKLDNN, paddle::platform::CPUPlace, ops::ReduceMeanMKLDNNKernel, ops::ReduceMeanMKLDNNKernel); + +REGISTER_OP_KERNEL(reduce_mean_grad, MKLDNN, paddle::platform::CPUPlace, + ops::ReduceMeanGradMKLDNNKernel, + ops::ReduceMeanGradMKLDNNKernel); diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h index 7e09aaa126effe73bf4389c94542018dc200fe45..58416f479c04354f24ad113d6a69e84fedae6b07 100644 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h +++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h @@ -121,5 +121,65 @@ class ReduceMKLDNNKernel : public framework::OpKernel { } }; +template +class ReduceGradMKLDNNKernel : public framework::OpKernel { + public: + void RunKernel(const framework::ExecutionContext& ctx, + dnnl::algorithm binary_type, float scale_x, + float scale_y) const { + const auto& dev_ctx = + ctx.template device_context(); + const auto& onednn_engine = dev_ctx.GetEngine(); + + auto dims = ctx.Attr>("dim"); + auto* input_dy = ctx.Input(framework::GradVarName("Out")); + auto* output_dx = ctx.Output(framework::GradVarName("X")); + + output_dx->mutable_data(ctx.GetPlace()); + output_dx->set_format(getPlainFormatTag(output_dx)); + output_dx->set_layout(input_dy->layout()); + + platform::BroadcastDataMKLDNNHandler handler( + binary_type, dev_ctx, onednn_engine, ctx.GetPlace(), output_dx, + input_dy, scale_x, scale_y, + ctx.InputName(framework::GradVarName("Out"))); + + const auto src_dx_memory = handler.AcquireSrcMemory(output_dx); + const auto src_dy_memory = handler.AcquireSecondSrcMemory(input_dy); + const auto binary_prim = handler.AcquireForwardPrimitive(); + + const std::unordered_map args = { + {DNNL_ARG_SRC_0, *src_dx_memory}, + {DNNL_ARG_SRC_1, *src_dy_memory}, + {DNNL_ARG_DST, *src_dx_memory}}; + + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + binary_prim->execute(astream, args); + astream.wait(); + } + + protected: + mkldnn::memory::format_tag getPlainFormatTag(const Tensor* tensor) const { + auto tensor_dims_size = tensor->dims().size(); + PADDLE_ENFORCE_EQ( + tensor_dims_size <= 5 && tensor_dims_size >= 1, true, + platform::errors::InvalidArgument( + "Dims for reduction_grad oneDNN op must be in range <1, 5>")); + + switch (tensor_dims_size) { + case 1: + return mkldnn::memory::format_tag::a; + case 2: + return mkldnn::memory::format_tag::ab; + case 3: + return mkldnn::memory::format_tag::abc; + case 4: + return mkldnn::memory::format_tag::abcd; + } + + return mkldnn::memory::format_tag::abcde; + } +}; + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc index 4676589e68910a7845a57c84ed4af2283c42328f..e62edcf559677e3cef3582b46dd0cdbc01b82e30 100644 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc +++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc @@ -25,6 +25,14 @@ class ReduceSumMKLDNNKernel : public ReduceMKLDNNKernel { } }; +template +class ReduceSumGradMKLDNNKernel : public ReduceGradMKLDNNKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + this->RunKernel(ctx, dnnl::algorithm::binary_add, 0.0f, 1.0f); + } +}; + } // namespace operators } // namespace paddle @@ -32,3 +40,7 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(reduce_sum, MKLDNN, paddle::platform::CPUPlace, ops::ReduceSumMKLDNNKernel, ops::ReduceSumMKLDNNKernel); + +REGISTER_OP_KERNEL(reduce_sum_grad, MKLDNN, paddle::platform::CPUPlace, + ops::ReduceSumGradMKLDNNKernel, + ops::ReduceSumGradMKLDNNKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 280464ea85279319c82551163c461a5ce0c4c3a7..913d941df8810bc2906f305b6239444d1280a4ae 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -559,15 +559,44 @@ class ReduceGradOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { + auto input_data_type = OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")); + +#ifdef PADDLE_WITH_MKLDNN + auto CanMKLDNNReduceGradBeUsed = [&]() { + auto dx_dims = ctx.Input("X")->dims(); + + if (dx_dims.size() > 5) return false; // max 5D tensor is supported + + if (ctx.Attr("reduce_all") || + ((int)ctx.Attr>("dim").size() == dx_dims.size())) + return true; + + auto dy_dims = ctx.Input(framework::GradVarName("Out"))->dims(); + + // Subtensor must be on rightmost part of the bigger tensor + for (int i = 0; i < dy_dims.size(); ++i) { + if (dx_dims[dx_dims.size() - dy_dims.size() + i] != dy_dims[i]) { + return false; + } + } + return true; + }; + if (this->CanMKLDNNBeUsed(ctx, input_data_type) && + CanMKLDNNReduceGradBeUsed()) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + int in_dtype = ctx.Attr("in_dtype"); if (in_dtype >= 0) { return framework::OpKernelType( static_cast(in_dtype), ctx.GetPlace()); } - return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.GetPlace()); + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 0c45da63edd70ed26e427b6faec070e5292f283e..54efa55cc4cd9da7d5a0b868093adee74b4fe002 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -630,6 +630,78 @@ class BinaryMKLDNNHandler : public platform::MKLDNNHandlerT { } }; +template +class BroadcastDataMKLDNNHandler + : public platform::MKLDNNHandlerT { + public: + BroadcastDataMKLDNNHandler(const dnnl::algorithm algo, + const MKLDNNDeviceContext& dev_ctx, + const mkldnn::engine engine, + platform::Place cpu_place, const Tensor* x, + const Tensor* y, float scale_x, float scale_y, + const std::string& uniq_name) + : platform::MKLDNNHandlerT( + dev_ctx, engine, cpu_place, + platform::CreateKey(dev_ctx, framework::vectorize(x->dims()), + uniq_name)) { + if (!this->isCached()) { + PADDLE_ENFORCE_EQ( + x->layout(), DataLayout::kMKLDNN, + platform::errors::InvalidArgument("Wrong layout set for X tensor.")); + PADDLE_ENFORCE_NE( + x->format(), MKLDNNMemoryFormat::undef, + platform::errors::InvalidArgument("Wrong format set for X tensor.")); + + PADDLE_ENFORCE_EQ( + y->layout(), DataLayout::kMKLDNN, + platform::errors::InvalidArgument("Wrong layout set for Y tensor.")); + PADDLE_ENFORCE_NE( + y->format(), MKLDNNMemoryFormat::undef, + platform::errors::InvalidArgument("Wrong format set for Y tensor.")); + + auto src1_tz = framework::vectorize(y->dims()); + const auto src0_tz = framework::vectorize(x->dims()); + + // GetExpectedKernelType checks if smaller vector is a subvector with all + // the dims in correct order on the rightmost part of the bigger vector, + // i.e. a correct vector for broadcasting: + // x = 5, 7, 3, 2, 4, 8 + // y = 4, 8 + src1_tz.reserve(src0_tz.size()); + + for (size_t i = src1_tz.size(); i < src0_tz.size(); ++i) { + src1_tz.insert(src1_tz.begin(), 1L); + } + + const auto src0_md = dnnl::memory::desc( + src0_tz, platform::MKLDNNGetDataType(), x->format()); + const auto src1_md = dnnl::memory::desc( + src1_tz, platform::MKLDNNGetDataType(), x->format()); + + dnnl::primitive_attr attributes; + attributes.set_scales(DNNL_ARG_SRC_0, 0, {scale_x}); + attributes.set_scales(DNNL_ARG_SRC_1, 0, {scale_y}); + + this->AcquireForwardPrimitiveDescriptor(attributes, algo, src0_md, + src1_md, src0_md); + } + } + + std::shared_ptr AcquireSrcMemory(framework::Tensor* input) { + T* input_data = input->data(); + memset(input_data, 0, this->fwd_pd_->src_desc().get_size()); + return this->AcquireMemoryFromPrimitive( + this->fwd_pd_->src_desc(), to_void_cast(input_data), "@src0_mem_p"); + } + + std::shared_ptr AcquireSecondSrcMemory( + const framework::Tensor* input) { + const T* input_data = input->data(); + return this->AcquireMemoryFromPrimitive( + this->fwd_pd_->src1_desc(), to_void_cast(input_data), "@src1_mem_p"); + } +}; + template class ReductionMKLDNNHandler : public platform::MKLDNNHandlerT { diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py index a894d042e426c0f224d3fe13a5ded10c44cddbe5..1d7ab4f6b336993f84d4932bd0da7b433dbb6b2c 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_bf16_mkldnn_op.py @@ -26,158 +26,182 @@ import paddle "place does not support BF16 evaluation") @unittest.skipIf(core.is_compiled_with_cuda(), "core is compiled with CUDA which has no BF implementation") -@skip_check_grad_ci(reason="not implemented") -class TestReduceSumDefaultBF16ONEDNNOp(OpTest): +class TestReduceSumDefaultBF16OneDNNOp(OpTest): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True - x_fp32 = np.random.random((5, 6, 10)).astype("float32") - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} - self.outputs = {'Out': x_fp32.sum(axis=0)} + self.x_fp32 = np.random.random((5, 6, 10)).astype("float32") + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.inputs = {'X': self.x_bf16} + self.outputs = {'Out': self.x_fp32.sum(axis=0)} self.attrs = {'use_mkldnn': self.use_mkldnn} def test_check_output(self): self.check_output(check_dygraph=False) + def calculate_grads(self): + tmp_tensor = np.zeros(self.x_fp32.shape).astype("float32") -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum4DBF16ONEDNNOp(TestReduceSumDefaultBF16ONEDNNOp): - def setUp(self): - self.op_type = "reduce_sum" - self.use_mkldnn = True - x_fp32 = np.random.random((5, 10, 5, 5)).astype("float32") - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} - self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [2]} - self.outputs = {'Out': x_fp32.sum(axis=tuple(self.attrs['dim']))} + prod_of_reduced_dims = self.inputs['X'].shape[0] + axis = 0 + if "dim" in self.attrs: + prod_of_reduced_dims = 1 + axis = tuple(self.attrs['dim']) + for i in range(len(axis)): + ax = axis[i] + if axis[i] < 0: + ax = len(axis) + axis[i] + prod_of_reduced_dims *= self.inputs['X'].shape[ax] -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum4DReduceAllWithoutReduceAllAttributeBF16ONEDNNOp( - TestReduceSumDefaultBF16ONEDNNOp): - def setUp(self): - self.op_type = "reduce_sum" - self.use_mkldnn = True - x_fp32 = np.random.normal(size=(2, 3, 5, 6)).astype('float32') - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} - self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [0, 1, 2, 3]} - self.outputs = {'Out': x_fp32.sum(axis=tuple(self.attrs['dim']))} + if 'reduce_all' in self.attrs: + if self.attrs['reduce_all'] is True: + axis = None + prod_of_reduced_dims = np.asarray(self.inputs['X'].shape).prod() + + keepdim = False + if 'keep_dim' in self.attrs: + keepdim = True + + self.grad_Out = self.x_fp32.sum(axis=axis, keepdims=keepdim) + self.grad_Out = np.atleast_1d(self.grad_Out) + self.grad_X = tmp_tensor + self.grad_Out # broadcast grad + + if self.op_type == 'reduce_mean': + self.grad_X /= prod_of_reduced_dims + + +class TestReduceDefaultWithGradBF16OneDNNOp(TestReduceSumDefaultBF16OneDNNOp): + def test_check_grad(self): + self.calculate_grads() + self.check_grad_with_place( + core.CPUPlace(), ["X"], + "Out", + check_dygraph=False, + user_defined_grads=[self.grad_X], + user_defined_grad_outputs=[convert_float_to_uint16(self.grad_Out)]) -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum4DReduceAllWithoutReduceAllAttributeNegativeDimsBF16ONEDNNOp( - TestReduceSumDefaultBF16ONEDNNOp): +class TestReduceSum4DReduceAllDimAttributeBF16OneDNNOp( + TestReduceDefaultWithGradBF16OneDNNOp): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True - x_fp32 = np.random.normal(size=(2, 7, 3, 5)).astype('float32') - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} - self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [-1, -2, -3, -4]} - self.outputs = {'Out': x_fp32.sum(axis=tuple(self.attrs['dim']))} + self.x_fp32 = np.random.normal(size=(2, 3, 5, 6)).astype('float32') + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.inputs = {'X': self.x_bf16} + self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [0, 1, 2, 3]} + self.outputs = {'Out': self.x_fp32.sum(axis=tuple(self.attrs['dim']))} -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum5DKeepDimsONEDNNOp(TestReduceSumDefaultBF16ONEDNNOp): +class TestReduceSum4DReduceAllWithoutReduceAllAttributeNegativeDimsBF16OneDNNOp( + TestReduceDefaultWithGradBF16OneDNNOp): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True - x_fp32 = np.random.random((2, 5, 3, 2, 2)).astype("float32") - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} - self.attrs = {'dim': (2, 3, 4), 'keep_dim': True, 'use_mkldnn': True} - self.outputs = { - 'Out': x_fp32.sum(axis=tuple(self.attrs['dim']), - keepdims=self.attrs['keep_dim']) - } + self.x_fp32 = np.random.normal(size=(4, 7, 6, 6)).astype('float32') + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.inputs = {'X': self.x_bf16} + self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [-1, -2, -3, -4]} + self.outputs = {'Out': self.x_fp32.sum(axis=tuple(self.attrs['dim']))} -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum5DReduceAllKeepDimsBF16ONEDNNOp( - TestReduceSumDefaultBF16ONEDNNOp): +class TestReduceSum5DReduceAllKeepDimsBF16OneDNNOp( + TestReduceDefaultWithGradBF16OneDNNOp): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True - x_fp32 = np.random.normal(size=(2, 5, 3, 2, 4)).astype('float32') - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} + self.x_fp32 = np.random.normal(size=(2, 5, 3, 2, 5)).astype('float32') + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.inputs = {'X': self.x_bf16} self.attrs = {'reduce_all': True, 'keep_dim': True, 'use_mkldnn': True} - self.outputs = {'Out': x_fp32.sum(keepdims=self.attrs['keep_dim'])} + self.outputs = {'Out': self.x_fp32.sum(keepdims=self.attrs['keep_dim'])} -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum4DReduceAllBF16ONEDNNOp(TestReduceSumDefaultBF16ONEDNNOp): +class TestReduceSum4DReduceAllBF16OneDNNOp( + TestReduceDefaultWithGradBF16OneDNNOp): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True - x_fp32 = np.random.normal(size=(4, 3, 2, 3)).astype('float32') - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} + self.x_fp32 = np.random.normal(size=(4, 5, 4, 5)).astype('float32') + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.inputs = {'X': self.x_bf16} self.attrs = {'reduce_all': True, 'use_mkldnn': self.use_mkldnn} - self.outputs = {'Out': x_fp32.sum()} + self.outputs = {'Out': self.x_fp32.sum()} @skip_check_grad_ci( reason="reduce_max is discontinuous non-derivable function," " its gradient check is not supported by unittest framework.") -class TestReduceMax3DBF16ONEDNNOp(TestReduceSumDefaultBF16ONEDNNOp): +class TestReduceMax3DBF16OneDNNOp(TestReduceSumDefaultBF16OneDNNOp): """Remove Max with subgradient from gradient check to confirm the success of CI.""" def setUp(self): self.op_type = "reduce_max" self.use_mkldnn = True - x_fp32 = np.random.random((5, 6, 10)).astype("float32") - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} + self.x_fp32 = np.random.random((5, 6, 10)).astype("float32") + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.inputs = {'X': self.x_bf16} self.attrs = {'dim': [-1], 'use_mkldnn': self.use_mkldnn} - self.outputs = {'Out': x_fp32.max(axis=tuple(self.attrs['dim']))} + self.outputs = {'Out': self.x_fp32.max(axis=tuple(self.attrs['dim']))} @skip_check_grad_ci( reason="reduce_max is discontinuous non-derivable function," " its gradient check is not supported by unittest framework.") -class TestReduceMax4DNegativeAndPositiveDimsBF16ONEDNNOp( - TestReduceSumDefaultBF16ONEDNNOp): +class TestReduceMax4DNegativeAndPositiveDimsBF16OneDNNOp( + TestReduceSumDefaultBF16OneDNNOp): """Remove Max with subgradient from gradient check to confirm the success of CI.""" def setUp(self): self.op_type = "reduce_max" self.use_mkldnn = True - x_fp32 = np.random.random((5, 6, 10, 9)).astype("float32") - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} + self.x_fp32 = np.random.random((5, 6, 10, 9)).astype("float32") + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.inputs = {'X': self.x_bf16} self.attrs = {'dim': [-1, 0, 1], 'use_mkldnn': self.use_mkldnn} - self.outputs = {'Out': x_fp32.max(axis=tuple(self.attrs['dim']))} + self.outputs = {'Out': self.x_fp32.max(axis=tuple(self.attrs['dim']))} @skip_check_grad_ci( reason="reduce_min is discontinuous non-derivable function," " its gradient check is not supported by unittest framework.") -class TestReduceMin3DBF16ONEDNNOp(TestReduceSumDefaultBF16ONEDNNOp): +class TestReduceMin3DBF16OneDNNOp(TestReduceSumDefaultBF16OneDNNOp): """Remove Min with subgradient from gradient check to confirm the success of CI.""" def setUp(self): self.op_type = "reduce_min" self.use_mkldnn = True - x_fp32 = np.random.random((5, 6, 10)).astype("float32") - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} + self.x_fp32 = np.random.random((5, 6, 10)).astype("float32") + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.inputs = {'X': self.x_bf16} self.attrs = {'dim': [2], 'use_mkldnn': self.use_mkldnn} - self.outputs = {'Out': x_fp32.min(axis=tuple(self.attrs['dim']))} + self.outputs = {'Out': self.x_fp32.min(axis=tuple(self.attrs['dim']))} -@skip_check_grad_ci(reason="not implemented") -class TestReduceMean3DBF16ONEDNNOp(TestReduceSumDefaultBF16ONEDNNOp): +class TestReduceMean3DBF16OneDNNOp(TestReduceDefaultWithGradBF16OneDNNOp): def setUp(self): self.op_type = "reduce_mean" self.use_mkldnn = True - x_fp32 = np.random.random((5, 6, 10)).astype("float32") - x_bf16 = convert_float_to_uint16(x_fp32) - self.inputs = {'X': x_bf16} + self.x_fp32 = np.random.random((5, 6, 10)).astype("float32") + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.inputs = {'X': self.x_bf16} self.attrs = {'use_mkldnn': self.use_mkldnn} - self.outputs = {'Out': x_fp32.sum(axis=0) / x_fp32.shape[0]} + self.outputs = {'Out': self.x_fp32.sum(axis=0) / self.x_fp32.shape[0]} + + +class TestReduceMean4DBF16OneDNNOp(TestReduceDefaultWithGradBF16OneDNNOp): + def setUp(self): + self.op_type = "reduce_mean" + self.use_mkldnn = True + self.x_fp32 = np.random.random((5, 6, 3, 5)).astype("float32") + self.x_bf16 = convert_float_to_uint16(self.x_fp32) + self.inputs = {'X': self.x_bf16} + self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [0, 1]} + self.outputs = { + 'Out': self.x_fp32.sum(axis=tuple(self.attrs['dim'])) / + (self.x_fp32.shape[0] * self.x_fp32.shape[1]) + } if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py index c913b9eeea27df8757f7b4dba8e6c49bff4c9a85..46ee2a14a2018549105b41b574344c636dcf6dce 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_reduce_mkldnn_op.py @@ -19,8 +19,7 @@ import paddle.fluid as fluid import paddle -@skip_check_grad_ci(reason="not implemented") -class TestReduceSumDefaultONEDNNOp(OpTest): +class TestReduceSumDefaultOneDNNOp(OpTest): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -32,46 +31,35 @@ class TestReduceSumDefaultONEDNNOp(OpTest): self.check_output() -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum4DONEDNNOp(TestReduceSumDefaultONEDNNOp): - def setUp(self): - self.op_type = "reduce_sum" - self.use_mkldnn = True - self.inputs = {'X': np.random.random((5, 10, 5, 5)).astype("float32")} - self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [2]} - self.outputs = { - 'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim'])) - } +class TestReduceDefaultWithGradOneDNNOp(TestReduceSumDefaultOneDNNOp): + def test_check_grad(self): + self.check_grad(['X'], 'Out') -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum4DReduceAllWithoutReduceAllAttributeONEDNNOp( - TestReduceSumDefaultONEDNNOp): +class TestReduceSum4DOneDNNOp(TestReduceDefaultWithGradOneDNNOp): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True self.inputs = {'X': np.random.random((5, 10, 5, 5)).astype("float32")} - self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [0, 1, 2, 3]} + self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [2]} self.outputs = { 'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim'])) } -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum4DReduceAllWithoutReduceAllAttributeNegativeDimsONEDNNOp( - TestReduceSumDefaultONEDNNOp): +class TestReduceSum4DReduceAllDimAttributeBF16OneDNNOp( + TestReduceDefaultWithGradOneDNNOp): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True - self.inputs = {'X': np.random.random((5, 10, 5, 5)).astype("float32")} - self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [-1, -2, -3, -4]} + self.inputs = {'X': np.random.random((5, 10, 5, 3)).astype("float32")} + self.attrs = {'use_mkldnn': self.use_mkldnn, 'dim': [0, 1, 2, 3]} self.outputs = { 'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim'])) } -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum5DKeepDimsONEDNNOp(TestReduceSumDefaultONEDNNOp): +class TestReduceSum5DKeepDimsOneDNNOp(TestReduceDefaultWithGradOneDNNOp): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -83,8 +71,8 @@ class TestReduceSum5DKeepDimsONEDNNOp(TestReduceSumDefaultONEDNNOp): } -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum5DReduceAllKeepDimsONEDNNOp(TestReduceSumDefaultONEDNNOp): +class TestReduceSum5DReduceAllKeepDimsOneDNNOp( + TestReduceDefaultWithGradOneDNNOp): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -95,8 +83,7 @@ class TestReduceSum5DReduceAllKeepDimsONEDNNOp(TestReduceSumDefaultONEDNNOp): } -@skip_check_grad_ci(reason="not implemented") -class TestReduceSum4DReduceAllONEDNNOp(TestReduceSumDefaultONEDNNOp): +class TestReduceSum4DReduceAllOneDNNOp(TestReduceDefaultWithGradOneDNNOp): def setUp(self): self.op_type = "reduce_sum" self.use_mkldnn = True @@ -108,7 +95,7 @@ class TestReduceSum4DReduceAllONEDNNOp(TestReduceSumDefaultONEDNNOp): @skip_check_grad_ci( reason="reduce_max is discontinuous non-derivable function," " its gradient check is not supported by unittest framework.") -class TestReduceMax3DONEDNNOp(TestReduceSumDefaultONEDNNOp): +class TestReduceMax3DOneDNNOp(TestReduceSumDefaultOneDNNOp): """Remove Max with subgradient from gradient check to confirm the success of CI.""" def setUp(self): @@ -124,8 +111,8 @@ class TestReduceMax3DONEDNNOp(TestReduceSumDefaultONEDNNOp): @skip_check_grad_ci( reason="reduce_max is discontinuous non-derivable function," " its gradient check is not supported by unittest framework.") -class TestReduceMax4DNegativeAndPositiveDimsONEDNNOp( - TestReduceSumDefaultONEDNNOp): +class TestReduceMax4DNegativeAndPositiveDimsOneDNNOp( + TestReduceSumDefaultOneDNNOp): """Remove Max with subgradient from gradient check to confirm the success of CI.""" def setUp(self): @@ -141,7 +128,7 @@ class TestReduceMax4DNegativeAndPositiveDimsONEDNNOp( @skip_check_grad_ci( reason="reduce_min is discontinuous non-derivable function," " its gradient check is not supported by unittest framework.") -class TestReduceMin3DONEDNNOp(TestReduceSumDefaultONEDNNOp): +class TestReduceMin3DOneDNNOp(TestReduceSumDefaultOneDNNOp): """Remove Min with subgradient from gradient check to confirm the success of CI.""" def setUp(self): @@ -154,8 +141,7 @@ class TestReduceMin3DONEDNNOp(TestReduceSumDefaultONEDNNOp): } -@skip_check_grad_ci(reason="not implemented") -class TestReduceMean3DONEDNNOp(TestReduceSumDefaultONEDNNOp): +class TestReduceMean3DOneDNNOp(TestReduceDefaultWithGradOneDNNOp): def setUp(self): self.op_type = "reduce_mean" self.use_mkldnn = True @@ -166,8 +152,7 @@ class TestReduceMean3DONEDNNOp(TestReduceSumDefaultONEDNNOp): } -@skip_check_grad_ci(reason="not implemented") -class TestReduceMean4DReduceAllONEDNNOp(TestReduceSumDefaultONEDNNOp): +class TestReduceMean4DReduceAllOneDNNOp(TestReduceDefaultWithGradOneDNNOp): def setUp(self): self.op_type = "reduce_mean" self.use_mkldnn = True @@ -179,16 +164,6 @@ class TestReduceMean4DReduceAllONEDNNOp(TestReduceSumDefaultONEDNNOp): } -@skip_check_grad_ci(reason="not implemented") -class TestReduceMeanNoReduce1DOp(TestReduceSumDefaultONEDNNOp): - def setUp(self): - self.op_type = "reduce_mean" - self.use_mkldnn = True - self.inputs = {'X': np.random.random((1)).astype("float32")} - self.attrs = {'use_mkldnn': self.use_mkldnn} - self.outputs = {'Out': self.inputs['X']} - - if __name__ == '__main__': paddle.enable_static() unittest.main()