diff --git a/paddle/phi/kernels/onednn/conv_handler.h b/paddle/phi/kernels/onednn/conv_handler.h
index 24e2daaeb65bbdfd02aa663c3a8f9dee1cc32b58..1473cb1b5a2483eef5401fc5b8235e16a261fbf7 100644
--- a/paddle/phi/kernels/onednn/conv_handler.h
+++ b/paddle/phi/kernels/onednn/conv_handler.h
@@ -180,7 +180,9 @@ class ConvOneDNNHandlerT
         weights_md = funcs::OneDNNMemDesc(
             weights_tz, data_type, funcs::OneDNNMemoryFormat::any);
       }
-
+      if (input->dims().size() == 4 && input->dims()[1] == 3) {
+        chosen_memory_format = funcs::OneDNNMemoryFormat::nhwc;
+      }
       const auto dst_md = funcs::OneDNNMemDesc(
           dst_tz, funcs::OneDNNGetDataType<T_out>(), chosen_memory_format);
       const auto fwd_prop_kind = dnnl::prop_kind::forward_inference;
diff --git a/test/cpp/fluid/mkldnn/CMakeLists.txt b/test/cpp/fluid/mkldnn/CMakeLists.txt
index 8a04637e5d705886a6a2299bb1b7888992def6f7..3d5883dabfbf8971887405ff111393e56922d174 100644
--- a/test/cpp/fluid/mkldnn/CMakeLists.txt
+++ b/test/cpp/fluid/mkldnn/CMakeLists.txt
@@ -24,6 +24,19 @@ cc_test(
        scope
        device_context)
 
+cc_test(
+  test_conv_mkldnn_nhwc
+  SRCS test_conv_mkldnn_nhwc.cc
+  DEPS executor
+       op_registry
+       depthwise_conv
+       tensor
+       phi
+       scope
+       device_context
+       enforce
+       generated_static_op)
+
 set(TEST_MKLDNN_CACHING_DEPS
     op_registry
     elementwise_mul_op
diff --git a/test/cpp/fluid/mkldnn/test_conv_mkldnn_nhwc.cc b/test/cpp/fluid/mkldnn/test_conv_mkldnn_nhwc.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ecc5ce726b2d8f66b29ab31f243d6f8c81ba499a
--- /dev/null
+++ b/test/cpp/fluid/mkldnn/test_conv_mkldnn_nhwc.cc
@@ -0,0 +1,111 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cstdlib>
+#include <memory>
+#include <random>
+
+#include "gtest/gtest.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/naive_executor.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/phi/common/place.h"
+#include "paddle/phi/core/enforce.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+USE_OP_ITSELF(conv2d);
+PD_DECLARE_KERNEL(conv2d, OneDNN, ONEDNN);
+
+template <typename DataType>
+void AddVarToScope(const std::string var_name,
+                   paddle::framework::Scope* scope,
+                   const paddle::framework::DDim& dims) {
+  std::random_device seed;
+  std::default_random_engine engine(seed());
+  std::uniform_real_distribution<float> dist(0, 100);
+
+  phi::DenseTensor tmp_tensor;
+  auto* tmp_data =
+      tmp_tensor.mutable_data<DataType>(dims, paddle::platform::CPUPlace());
+  auto* tensor = scope->Var(var_name)->GetMutable<phi::DenseTensor>();
+  tensor->mutable_data<DataType>(dims, paddle::platform::CPUPlace());
+  for (auto i = 0; i < tensor->numel(); ++i) {
+    tmp_data[i] = static_cast<DataType>(dist(engine));
+  }
+  paddle::framework::TensorCopySync(
+      tmp_tensor, paddle::platform::CPUPlace(), tensor);
+}
+TEST(test_conv2d_output, fp32) {
+  paddle::framework::Scope scope;
+  paddle::platform::CPUPlace cpu_place;
+
+  paddle::framework::OpDesc conv2d_op(nullptr);
+  conv2d_op.SetType("conv2d");
+  conv2d_op.SetInput("Input", {"conv2d-X"});
+  conv2d_op.SetInput("Filter", {"conv2d-Y"});
+  conv2d_op.SetOutput("Output", {"conv2d-Out"});
+
+  AddVarToScope<float>("conv2d-X", &scope, {1, 3, 224, 224});
+  AddVarToScope<float>("conv2d-Y", &scope, {64, 3, 7, 7});
+  AddVarToScope<float>("conv2d-Out", &scope, {1, 64, 218, 218});
+
+  const std::vector<int> strides({1, 1});
+  const std::vector<int> paddings({1, 1});
+  const std::vector<int> dilations({1, 1});
+  const int groups = 1;
+
+  conv2d_op.SetAttr("strides", strides);
+  conv2d_op.SetAttr("paddings", paddings);
+  conv2d_op.SetAttr("dilations", dilations);
+  conv2d_op.SetAttr("groups", groups);
+  conv2d_op.SetAttr("use_mkldnn", true);
+
+  auto op = paddle::framework::OpRegistry::CreateOp(conv2d_op);
+
+  op->Run(scope, cpu_place);
+}
+TEST(test_conv2d_output, int8) {
+  paddle::framework::Scope scope;
+  paddle::platform::CPUPlace cpu_place;
+
+  paddle::framework::OpDesc conv2d_op(nullptr);
+  conv2d_op.SetType("conv2d");
+  conv2d_op.SetInput("Input", {"conv2d-X"});
+  conv2d_op.SetInput("Filter", {"conv2d-Y"});
+  conv2d_op.SetOutput("Output", {"conv2d-Out"});
+
+  AddVarToScope<int8_t>("conv2d-X", &scope, {1, 3, 224, 224});
+  AddVarToScope<int8_t>("conv2d-Y", &scope, {64, 3, 7, 7});
+  AddVarToScope<int8_t>("conv2d-Out", &scope, {1, 64, 218, 218});
+
+  const std::vector<int> strides({1, 1});
+  const std::vector<int> paddings({1, 1});
+  const std::vector<int> dilations({1, 1});
+  const int groups = 1;
+
+  conv2d_op.SetAttr("strides", strides);
+  conv2d_op.SetAttr("paddings", paddings);
+  conv2d_op.SetAttr("dilations", dilations);
+  conv2d_op.SetAttr("groups", groups);
+  conv2d_op.SetAttr("use_mkldnn", true);
+  conv2d_op.SetAttr("mkldnn_data_type", std::string("int8"));
+  conv2d_op.SetAttr("force_fp32_output", false);
+
+  auto op = paddle::framework::OpRegistry::CreateOp(conv2d_op);
+
+  op->Run(scope, cpu_place);
+}