diff --git a/paddle/function/ConvOpTest.h b/paddle/function/ConvOpTest.h
index d745afca56e30869f4bdf85a067931dc0febcb59..d8c3bb03b3aeac876c61a63ac648ac7f3d56fb23 100644
--- a/paddle/function/ConvOpTest.h
+++ b/paddle/function/ConvOpTest.h
@@ -80,6 +80,12 @@ void Convolution(const std::string& conv1,
             for (size_t stride : {1, 2}) {
               for (size_t padding : {0, 1}) {
                 if (padding >= filterSize) break;
+
+                // NNPACK only supports stride = 1 if batchSize > 1
+                if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") &&
+                    batchSize > 1 && stride > 1)
+                  break;
+
                 size_t outputSize =
                     (inputSize - filterSize + 2 * padding + stride) / stride;
                 VLOG(3) << " batchSize=" << batchSize
@@ -102,7 +108,7 @@ void Convolution(const std::string& conv1,
                         .set("paddings", paddings)
                         .set("strides", strides)
                         .set("groups", (size_t)1)
-                        .set("algo", "auto"));
+                        .set("algo", (std::string) "auto"));
 
                 TensorShape input{
                     batchSize, inputChannels, inputSize, inputSize};
@@ -163,7 +169,7 @@ void Convolution2(const std::string& conv1,
                         .set("paddings", paddings)
                         .set("strides", strides)
                         .set("groups", (size_t)1)
-                        .set("algo", "auto"));
+                        .set("algo", (std::string) "auto"));
 
                 TensorShape input{
                     batchSize, inputChannels, inputHeight, inputWidth};
@@ -196,6 +202,11 @@ void DepthwiseConvolution(const std::string& conv1,
           for (size_t outputChannels : {32, 64}) {
             for (size_t stride : {1, 2}) {
               for (size_t padding : {0, 1}) {
+                // NNPACK only supports stride = 1 if batchSize > 1
+                if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") &&
+                    batchSize > 1 && stride > 1)
+                  break;
+
                 size_t outputSize =
                     (inputSize - filterSize + 2 * padding + stride) / stride;
                 VLOG(3) << " batchSize=" << batchSize
@@ -219,7 +230,7 @@ void DepthwiseConvolution(const std::string& conv1,
                         .set("paddings", paddings)
                         .set("strides", strides)
                         .set("groups", groups)
-                        .set("algo", "auto"));
+                        .set("algo", (std::string) "auto"));
 
                 TensorShape input{
                     batchSize, inputChannels, inputSize, inputSize};
diff --git a/paddle/function/nnpack/NNPACKConvOpTest.cpp b/paddle/function/nnpack/NNPACKConvOpTest.cpp
index 48180112111c67f36ddd425008187201655089c9..4dd3982487f3567f461ddaea8c5dc719fff04736 100644
--- a/paddle/function/nnpack/NNPACKConvOpTest.cpp
+++ b/paddle/function/nnpack/NNPACKConvOpTest.cpp
@@ -13,87 +13,18 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <gtest/gtest.h>
-#include "paddle/function/Function.h"
-#include "paddle/function/FunctionTest.h"
-
-DEFINE_string(algo,
-              "auto",
-              "The algorithm (auto, ft8x8, ft16x16, wt8x8, "
-              "implicit-gemm, or direct) for computing convolution of NNPACK.");
+#include "paddle/function/ConvOpTest.h"
 
 namespace paddle {
 
-#define IS_NNPACK_SUPPORT(algo, filterSize, stride)        \
-  if (algo == "direct" && filterSize != 1) continue;       \
-  if (algo == "direct" && batchSize != 1) continue;        \
-  if (algo == "wt8x8" && filterSize != 3) continue;        \
-  if (algo == "implicit-gemm" && batchSize != 1) continue; \
-  if (algo != "auto" && algo != "implicit-gemm" && stride > 1) continue;
-
-class ConvolutionTest {
-public:
-  ConvolutionTest(const std::string& conv1,
-                  const std::string& conv2,
-                  std::string algo = "auto") {
-    for (size_t batchSize : {1, 32}) {
-      for (size_t inputSize : {7, 14, 54}) {
-        for (size_t filterSize : {1, 3, 5}) {
-          for (size_t inputChannels : {3, 64}) {
-            for (size_t outputChannels : {3, 64, 128}) {
-              if (inputChannels < outputChannels) break;
-              for (size_t stride : {1, 2}) {
-                // if batchSize > 1 NNPACKConv only supports stride = 1
-                if (batchSize > 1 && stride > 1) break;
-                for (size_t padding : {0, 1}) {
-                  if (padding >= filterSize) break;
-                  size_t outputSize =
-                      (inputSize - filterSize + 2 * padding + stride) / stride;
-                  IS_NNPACK_SUPPORT(algo, filterSize, stride);
-                  LOG(INFO) << " batchSize=" << batchSize
-                            << " inputChannels=" << inputChannels
-                            << " inputHeight=" << inputSize
-                            << " inputWidth=" << inputSize
-                            << " outputChannels=" << outputChannels
-                            << " filterHeight=" << filterSize
-                            << " filterWidth=" << filterSize
-                            << " outputHeight=" << outputSize
-                            << " outputWidth=" << outputSize
-                            << " stride=" << stride << " padding=" << padding;
-
-                  std::vector<size_t> paddings = {padding, padding};
-                  std::vector<size_t> strides = {stride, stride};
-                  Compare2Function<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
-                      conv1,
-                      conv2,
-                      FuncConfig()
-                          .set("paddings", paddings)
-                          .set("strides", strides)
-                          .set("groups", (size_t)1)
-                          .set("algo", algo));
-
-                  TensorShape shape0{
-                      batchSize, inputChannels, inputSize, inputSize};
-                  TensorShape shape1{
-                      outputChannels, inputChannels, filterSize, filterSize};
-                  TensorShape shape2{
-                      batchSize, outputChannels, outputSize, outputSize};
-                  test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape0));
-                  test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape1));
-                  test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape2));
-                  test.run();
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-};
+TEST(NNPACK, Forward) {
+  Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU>(
+      "GemmConv-CPU", "NNPACKConv-CPU", forward);
+}
 
-TEST(Convolution, NNPACK) {
-  // NNPACK only supports stride = 1
-  ConvolutionTest test("GemmConv-CPU", "NNPACKConv-CPU", FLAGS_algo);
+TEST(NNPACK, Depthwise) {
+  DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU>(
+      "GemmConv-CPU", "NNPACKConv-CPU", forward);
 }
 
 }  // namespace paddle