diff --git a/paddle/fluid/operators/pool_cudnn_op.cu.cc b/paddle/fluid/operators/pool_cudnn_op.cu.cc index 8ceb22d8cc4c33babb19fa63571a9fd1793c4355..1bdb3728f538e2fab1851234eb277ceead8c60d5 100644 --- a/paddle/fluid/operators/pool_cudnn_op.cu.cc +++ b/paddle/fluid/operators/pool_cudnn_op.cu.cc @@ -20,6 +20,8 @@ limitations under the License. */ #include "paddle/fluid/platform/cudnn_helper.h" #endif #ifdef PADDLE_WITH_HIP +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/miopen_helper.h" #endif @@ -264,6 +266,34 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { std::string padding_algorithm = ctx.Attr("padding_algorithm"); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); +#ifdef PADDLE_WITH_HIP + if (pooling_type == "max") { + using OpKernelMap = paddle::framework::OperatorWithKernel::OpKernelMap; + using OpKernelFunc = paddle::framework::OperatorWithKernel::OpKernelFunc; + auto &all_op_kernels = + paddle::framework::OperatorWithKernel::AllOpKernels(); + std::string op_type = "pool2d_grad"; + auto kernels_iter = all_op_kernels.find(op_type); + PADDLE_ENFORCE_NE( + kernels_iter, all_op_kernels.end(), + platform::errors::Unavailable( + "There are no kernels which are registered in the %s operator.", + op_type)); + OpKernelMap &kernels = kernels_iter->second; + paddle::framework::OpKernelType expected_kernel_key( + paddle::framework::ToDataType(typeid(T)), ctx.GetPlace()); + auto kernel_iter = kernels.find(expected_kernel_key); + PADDLE_ENFORCE_NE(kernel_iter, kernels.end(), + platform::errors::NotFound( + "Operator (%s) does not have kernel for %s.", + op_type, KernelTypeToString(expected_kernel_key))); + std::unique_ptr kernel_func_( + new OpKernelFunc(kernel_iter->second)); + (*kernel_func_)(ctx); + return; + } +#endif + // update paddings auto in_x_dims = input->dims(); framework::DDim data_dims; diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 6decff69ad65cfa4c51468d6892e65bda1386028..ce728f1121dfdbc04dc123c3976539ec143fc9d6 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -174,6 +174,11 @@ class Conv2D(layers.Layer): dtype='float32'): assert param_attr is not False, "param_attr should not be False here." super(Conv2D, self).__init__() + + if (core.is_compiled_with_cuda() and paddle.fluid.get_flags( + "FLAGS_conv2d_disable_cudnn")["FLAGS_conv2d_disable_cudnn"]): + use_cudnn = False + self._num_channels = num_channels self._groups = groups self._stride = utils.convert_to_list(stride, 2, 'stride') diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_op.py index 83bba0b0ca1c3af17d25525816e4cbf8cf0c3338..bbb0f5b10393d0faf62bd931854e1522c213d419 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_op.py @@ -1470,35 +1470,59 @@ class TestConv2DAPI_Error(unittest.TestCase): not (core.is_compiled_with_cuda() or core.is_compiled_with_rocm()), "core is not compiled with CUDA or ROCM") class TestConv2DEnviron(unittest.TestCase): - def run_conv2d_api(self): - inputs = fluid.layers.data( - shape=[2, 3, 5, 5], - append_batch_size=False, - name="inputs", - dtype="float32") - fluid.layers.conv2d( - input=inputs, - num_filters=4, - filter_size=[3, 3], - stride=[1, 1], - padding=0, - dilation=[1, 1], - groups=1, - data_format="NCHW") - - x_var = paddle.uniform((2, 3, 5, 5), dtype="float32", min=-1., max=1.) - conv = paddle.nn.Conv2D( - in_channels=3, - out_channels=4, - kernel_size=(3, 3), - data_format="NCHW") - y_var = conv(x_var) + def run1(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + inputs = fluid.layers.data( + shape=[2, 3, 5, 5], + append_batch_size=False, + name="inputs", + dtype="float32") + result = fluid.layers.conv2d( + input=inputs, + num_filters=4, + filter_size=[3, 3], + stride=[1, 1], + padding=0, + dilation=[1, 1], + groups=1, + data_format="NCHW") + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + fetches = exe.run(fluid.default_main_program(), + feed={"inputs": self.input_np}, + fetch_list=[result]) + + def run2(self, place): + with fluid.dygraph.guard(place): + inputs = fluid.dygraph.to_variable(self.input_np) + conv = paddle.nn.Conv2D( + in_channels=3, + out_channels=4, + kernel_size=(3, 3), + data_format="NCHW") + result = conv(inputs) + + def run3(self, place): + with fluid.dygraph.guard(place): + inputs = fluid.dygraph.to_variable(self.input_np) + conv = paddle.fluid.dygraph.nn.Conv2D( + num_channels=3, + num_filters=4, + filter_size=(3, 3), ) + result = conv(inputs) + + def run_all(self, place): + self.run1(place) + self.run2(place) + self.run3(place) def test_environ(self): - fluid.set_flags({'FLAGS_conv2d_disable_cudnn': False}) - self.run_conv2d_api() - fluid.set_flags({'FLAGS_conv2d_disable_cudnn': True}) - self.run_conv2d_api() + self.input_np = np.random.random([2, 3, 5, 5]).astype("float32") + for place in [paddle.CPUPlace(), paddle.CUDAPlace(0)]: + fluid.set_flags({'FLAGS_conv2d_disable_cudnn': False}) + self.run_all(place) + fluid.set_flags({'FLAGS_conv2d_disable_cudnn': True}) + self.run_all(place) if __name__ == '__main__':