From 02e04b44411a851a86217815e7d740c634d8324f Mon Sep 17 00:00:00 2001 From: xzl <zlx_hg@163.com> Date: Tue, 18 Jul 2017 22:04:53 +0800 Subject: [PATCH] fuse the conv and depthwise conv together --- paddle/function/ConvOpTest.cpp | 281 ++++++++++++--------------------- 1 file changed, 104 insertions(+), 177 deletions(-) diff --git a/paddle/function/ConvOpTest.cpp b/paddle/function/ConvOpTest.cpp index 61f0c18bed4..27609fbbd44 100644 --- a/paddle/function/ConvOpTest.cpp +++ b/paddle/function/ConvOpTest.cpp @@ -25,11 +25,17 @@ enum TestType { kBackwardFilterTest = 2, }; +enum LayerType { + convolutionType = 0, + depthwiseConvolutionType = 1, +}; + template <DeviceType DType1, DeviceType DType2> class ConvolutionTest { public: ConvolutionTest(const std::string& conv1, const std::string& conv2, + LayerType layerType, TestType type, std::string algo = "auto") { for (size_t batchSize : {1, 32}) { @@ -37,7 +43,17 @@ public: for (size_t filterSize : {1, 3, 5}) { for (size_t inputChannels : {3, 64}) { for (size_t outputChannels : {3, 64, 128}) { - if (inputChannels < outputChannels) break; + if (inputChannels > outputChannels) break; + if (layerType == depthwiseConvolutionType && + outputChannels % inputChannels != 0) + break; + + size_t groups = 1; + + if (layerType == depthwiseConvolutionType) { + groups = inputChannels; + } + for (size_t stride : {1, 2}) { for (size_t padding : {0, 1}) { if (padding >= filterSize) break; @@ -62,13 +78,24 @@ public: FuncConfig() .set("paddings", paddings) .set("strides", strides) - .set("groups", (size_t)1) + .set("groups", groups) .set("algo", algo)); TensorShape input{ batchSize, inputChannels, inputSize, inputSize}; - TensorShape filter{ - outputChannels, inputChannels, filterSize, filterSize}; + + TensorShape filter; + if (layerType == depthwiseConvolutionType) + filter = TensorShape({groups, + outputChannels / groups, + (size_t)1, + filterSize, + filterSize}); + else + filter = TensorShape({outputChannels, + inputChannels, + filterSize, + filterSize}); TensorShape output{ batchSize, outputChannels, outputSize, outputSize}; @@ -105,6 +132,7 @@ class ConvolutionTest2 { public: ConvolutionTest2(const std::string& conv1, const std::string& conv2, + LayerType layerType, TestType type, std::string algo = "auto") { for (size_t batchSize : {16}) { @@ -113,7 +141,16 @@ public: for (size_t filterHeight : {1, 5}) { for (size_t filterWidth : {3, 7}) { for (size_t inputChannels : {7}) { - for (size_t outputChannels : {32}) { + for (size_t outputChannels : {7, 32}) { + if (layerType == depthwiseConvolutionType && + outputChannels % inputChannels != 0) + break; + + size_t groups = 1; + + if (layerType == depthwiseConvolutionType) { + groups = inputChannels; + } size_t stride = 1; size_t padding = 0; size_t outputHeight = @@ -141,13 +178,24 @@ public: FuncConfig() .set("paddings", paddings) .set("strides", strides) - .set("groups", (size_t)1) + .set("groups", groups) .set("algo", algo)); TensorShape input{ batchSize, inputChannels, inputHeight, inputWidth}; - TensorShape filter{ - outputChannels, inputChannels, filterHeight, filterWidth}; + + TensorShape filter; + if (layerType == depthwiseConvolutionType) + filter = TensorShape({groups, + outputChannels / groups, + (size_t)1, + filterHeight, + filterWidth}); + else + filter = TensorShape({outputChannels, + inputChannels, + filterHeight, + filterWidth}); TensorShape output{ batchSize, outputChannels, outputHeight, outputWidth}; @@ -177,183 +225,46 @@ public: } }; -template <DeviceType DType1, DeviceType DType2> -class DepthwiseConvolutionTest { -public: - DepthwiseConvolutionTest(const std::string& conv1, - const std::string& conv2, - TestType type, - std::string algo = "auto") { - for (size_t batchSize : {1, 32}) { - for (size_t inputSize : {7, 14, 54}) { - for (size_t filterSize : {1, 3, 5}) { - for (size_t inputChannels : {64, 128}) { - size_t outputChannels = inputChannels; - for (size_t stride : {1, 2}) { - for (size_t padding : {0, 1}) { - if (padding >= filterSize) break; - size_t outputSize = - (inputSize - filterSize + 2 * padding + stride) / stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize << " stride=" << stride - << " padding=" << padding; - - std::vector<size_t> paddings = {padding, padding}; - std::vector<size_t> strides = {stride, stride}; - size_t groups = inputChannels; - Compare2Function<DType1, DType2> test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", groups) - .set("algo", algo)); - - TensorShape input{ - batchSize, inputChannels, inputSize, inputSize}; - TensorShape filter{inputChannels, 1, 1, filterSize, filterSize}; - TensorShape output{ - batchSize, outputChannels, outputSize, outputSize}; - - if (type == kForwardTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.run(); - } else if (type == kBackwardInputTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); - test.run(); - } else if (type == kBackwardFilterTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.run(); - } - } - } - } - } - } - } - } -}; - -// Mainly used to test cases where the height and width (input, filter) -// are not equal. -template <DeviceType DType1, DeviceType DType2> -class DepthwiseConvolutionTest2 { -public: - DepthwiseConvolutionTest2(const std::string& conv1, - const std::string& conv2, - TestType type, - std::string algo = "auto") { - for (size_t batchSize : {16}) { - for (size_t inputHeight : {7, 31}) { - for (size_t inputWidth : {10, 54}) { - for (size_t filterHeight : {1, 5}) { - for (size_t filterWidth : {3, 7}) { - for (size_t inputChannels : {32}) { - size_t outputChannels = inputChannels; - size_t stride = 1; - size_t padding = 0; - size_t outputHeight = - (inputHeight - filterHeight + 2 * padding + stride) / - stride; - size_t outputWidth = - (inputWidth - filterWidth + 2 * padding + stride) / stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputHeight - << " inputWidth=" << inputWidth - << " outputChannels=" << outputChannels - << " filterHeight=" << filterHeight - << " filterWidth=" << filterWidth - << " outputHeight=" << outputHeight - << " outputWidth=" << outputWidth - << " stride=" << stride << " padding=" << padding; - - std::vector<size_t> paddings = {padding, padding}; - std::vector<size_t> strides = {stride, stride}; - size_t groups = inputChannels; - Compare2Function<DType1, DType2> test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", groups) - .set("algo", algo)); - - TensorShape input{ - batchSize, inputChannels, inputHeight, inputWidth}; - TensorShape filter{ - inputChannels, 1, 1, filterHeight, filterWidth}; - TensorShape output{ - batchSize, outputChannels, outputHeight, outputWidth}; - - if (type == kForwardTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.run(); - } else if (type == kBackwardInputTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); - test.run(); - } else if (type == kBackwardFilterTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.run(); - } - } - } - } - } - } - } - } -}; - // ======Start Convolution TEST====== TEST(Forward, GEMM) { ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test( - "NaiveConv-CPU", "GemmConv-CPU", kForwardTest); + "NaiveConv-CPU", "GemmConv-CPU", convolutionType, kForwardTest); ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2( - "NaiveConv-CPU", "GemmConv-CPU", kForwardTest); + "NaiveConv-CPU", "GemmConv-CPU", convolutionType, kForwardTest); } #ifndef PADDLE_ONLY_CPU TEST(Forward, GEMM2) { ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( - "GemmConv-CPU", "GemmConv-GPU", kForwardTest); + "GemmConv-CPU", "GemmConv-GPU", convolutionType, kForwardTest); ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( - "GemmConv-CPU", "GemmConv-GPU", kForwardTest); + "GemmConv-CPU", "GemmConv-GPU", convolutionType, kForwardTest); } TEST(BackwardInput, GEMM) { ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( - "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest); + "GemmConvGradInput-CPU", + "GemmConvGradInput-GPU", + convolutionType, + kBackwardInputTest); ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( - "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest); + "GemmConvGradInput-CPU", + "GemmConvGradInput-GPU", + convolutionType, + kBackwardInputTest); } TEST(BackwardFilter, GEMM) { ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( - "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest); + "GemmConvGradFilter-CPU", + "GemmConvGradFilter-GPU", + convolutionType, + kBackwardFilterTest); ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( - "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest); + "GemmConvGradFilter-CPU", + "GemmConvGradFilter-GPU", + convolutionType, + kBackwardFilterTest); } #endif // ======End Convolution TEST====== @@ -364,38 +275,54 @@ TEST(BackwardFilter, GEMM) { #ifndef PADDLE_ONLY_CPU TEST(DepthwiseConvForward, GEMM) { - DepthwiseConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test( - "GemmConv-GPU", "DepthwiseConv-GPU", kForwardTest); - DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( - "GemmConv-GPU", "DepthwiseConv-GPU", kForwardTest); + ConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test( + "GemmConv-GPU", + "DepthwiseConv-GPU", + depthwiseConvolutionType, + kForwardTest); + ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( + "GemmConv-GPU", + "DepthwiseConv-GPU", + depthwiseConvolutionType, + kForwardTest); } TEST(DepthwiseConvForward, GEMM2) { - DepthwiseConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test( - "DepthwiseConv-GPU", "DepthwiseConv-GPU", kForwardTest); - DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( - "DepthwiseConv-GPU", "DepthwiseConv-GPU", kForwardTest); + ConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test( + "DepthwiseConv-GPU", + "DepthwiseConv-GPU", + depthwiseConvolutionType, + kForwardTest); + ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( + "DepthwiseConv-GPU", + "DepthwiseConv-GPU", + depthwiseConvolutionType, + kForwardTest); } TEST(DepthwiseConvBackwardInput, GEMM) { - DepthwiseConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( + ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( "DepthwiseConvGradInput-GPU", "DepthwiseConvGradInput-GPU", + depthwiseConvolutionType, kBackwardInputTest); - DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( + ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( "DepthwiseConvGradInput-GPU", "DepthwiseConvGradInput-GPU", + depthwiseConvolutionType, kBackwardInputTest); } TEST(DepthwiseConvBackwardFilter, GEMM) { - DepthwiseConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( + ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( "DepthwiseConvGradFilter-GPU", "DepthwiseConvGradFilter-GPU", + depthwiseConvolutionType, kBackwardFilterTest); - DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( + ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( "DepthwiseConvGradFilter-GPU", "DepthwiseConvGradFilter-GPU", + depthwiseConvolutionType, kBackwardFilterTest); } #endif -- GitLab