提交 02e04b44 编写于 作者: X xzl

fuse the conv and depthwise conv together

上级 6267312a
...@@ -25,11 +25,17 @@ enum TestType { ...@@ -25,11 +25,17 @@ enum TestType {
kBackwardFilterTest = 2, kBackwardFilterTest = 2,
}; };
enum LayerType {
convolutionType = 0,
depthwiseConvolutionType = 1,
};
template <DeviceType DType1, DeviceType DType2> template <DeviceType DType1, DeviceType DType2>
class ConvolutionTest { class ConvolutionTest {
public: public:
ConvolutionTest(const std::string& conv1, ConvolutionTest(const std::string& conv1,
const std::string& conv2, const std::string& conv2,
LayerType layerType,
TestType type, TestType type,
std::string algo = "auto") { std::string algo = "auto") {
for (size_t batchSize : {1, 32}) { for (size_t batchSize : {1, 32}) {
...@@ -37,7 +43,17 @@ public: ...@@ -37,7 +43,17 @@ public:
for (size_t filterSize : {1, 3, 5}) { for (size_t filterSize : {1, 3, 5}) {
for (size_t inputChannels : {3, 64}) { for (size_t inputChannels : {3, 64}) {
for (size_t outputChannels : {3, 64, 128}) { for (size_t outputChannels : {3, 64, 128}) {
if (inputChannels < outputChannels) break; if (inputChannels > outputChannels) break;
if (layerType == depthwiseConvolutionType &&
outputChannels % inputChannels != 0)
break;
size_t groups = 1;
if (layerType == depthwiseConvolutionType) {
groups = inputChannels;
}
for (size_t stride : {1, 2}) { for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) { for (size_t padding : {0, 1}) {
if (padding >= filterSize) break; if (padding >= filterSize) break;
...@@ -62,13 +78,24 @@ public: ...@@ -62,13 +78,24 @@ public:
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("groups", (size_t)1) .set("groups", groups)
.set("algo", algo)); .set("algo", algo));
TensorShape input{ TensorShape input{
batchSize, inputChannels, inputSize, inputSize}; batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize}; TensorShape filter;
if (layerType == depthwiseConvolutionType)
filter = TensorShape({groups,
outputChannels / groups,
(size_t)1,
filterSize,
filterSize});
else
filter = TensorShape({outputChannels,
inputChannels,
filterSize,
filterSize});
TensorShape output{ TensorShape output{
batchSize, outputChannels, outputSize, outputSize}; batchSize, outputChannels, outputSize, outputSize};
...@@ -105,6 +132,7 @@ class ConvolutionTest2 { ...@@ -105,6 +132,7 @@ class ConvolutionTest2 {
public: public:
ConvolutionTest2(const std::string& conv1, ConvolutionTest2(const std::string& conv1,
const std::string& conv2, const std::string& conv2,
LayerType layerType,
TestType type, TestType type,
std::string algo = "auto") { std::string algo = "auto") {
for (size_t batchSize : {16}) { for (size_t batchSize : {16}) {
...@@ -113,7 +141,16 @@ public: ...@@ -113,7 +141,16 @@ public:
for (size_t filterHeight : {1, 5}) { for (size_t filterHeight : {1, 5}) {
for (size_t filterWidth : {3, 7}) { for (size_t filterWidth : {3, 7}) {
for (size_t inputChannels : {7}) { for (size_t inputChannels : {7}) {
for (size_t outputChannels : {32}) { for (size_t outputChannels : {7, 32}) {
if (layerType == depthwiseConvolutionType &&
outputChannels % inputChannels != 0)
break;
size_t groups = 1;
if (layerType == depthwiseConvolutionType) {
groups = inputChannels;
}
size_t stride = 1; size_t stride = 1;
size_t padding = 0; size_t padding = 0;
size_t outputHeight = size_t outputHeight =
...@@ -141,13 +178,24 @@ public: ...@@ -141,13 +178,24 @@ public:
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("groups", (size_t)1) .set("groups", groups)
.set("algo", algo)); .set("algo", algo));
TensorShape input{ TensorShape input{
batchSize, inputChannels, inputHeight, inputWidth}; batchSize, inputChannels, inputHeight, inputWidth};
TensorShape filter{
outputChannels, inputChannels, filterHeight, filterWidth}; TensorShape filter;
if (layerType == depthwiseConvolutionType)
filter = TensorShape({groups,
outputChannels / groups,
(size_t)1,
filterHeight,
filterWidth});
else
filter = TensorShape({outputChannels,
inputChannels,
filterHeight,
filterWidth});
TensorShape output{ TensorShape output{
batchSize, outputChannels, outputHeight, outputWidth}; batchSize, outputChannels, outputHeight, outputWidth};
...@@ -177,183 +225,46 @@ public: ...@@ -177,183 +225,46 @@ public:
} }
}; };
template <DeviceType DType1, DeviceType DType2>
class DepthwiseConvolutionTest {
public:
DepthwiseConvolutionTest(const std::string& conv1,
const std::string& conv2,
TestType type,
std::string algo = "auto") {
for (size_t batchSize : {1, 32}) {
for (size_t inputSize : {7, 14, 54}) {
for (size_t filterSize : {1, 3, 5}) {
for (size_t inputChannels : {64, 128}) {
size_t outputChannels = inputChannels;
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize << " stride=" << stride
<< " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
size_t groups = inputChannels;
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", groups)
.set("algo", algo));
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{inputChannels, 1, 1, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
if (type == kForwardTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.run();
} else if (type == kBackwardInputTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
test.run();
} else if (type == kBackwardFilterTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.run();
}
}
}
}
}
}
}
}
};
// Mainly used to test cases where the height and width (input, filter)
// are not equal.
template <DeviceType DType1, DeviceType DType2>
class DepthwiseConvolutionTest2 {
public:
DepthwiseConvolutionTest2(const std::string& conv1,
const std::string& conv2,
TestType type,
std::string algo = "auto") {
for (size_t batchSize : {16}) {
for (size_t inputHeight : {7, 31}) {
for (size_t inputWidth : {10, 54}) {
for (size_t filterHeight : {1, 5}) {
for (size_t filterWidth : {3, 7}) {
for (size_t inputChannels : {32}) {
size_t outputChannels = inputChannels;
size_t stride = 1;
size_t padding = 0;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
size_t outputWidth =
(inputWidth - filterWidth + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputHeight
<< " inputWidth=" << inputWidth
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterHeight
<< " filterWidth=" << filterWidth
<< " outputHeight=" << outputHeight
<< " outputWidth=" << outputWidth
<< " stride=" << stride << " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
size_t groups = inputChannels;
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", groups)
.set("algo", algo));
TensorShape input{
batchSize, inputChannels, inputHeight, inputWidth};
TensorShape filter{
inputChannels, 1, 1, filterHeight, filterWidth};
TensorShape output{
batchSize, outputChannels, outputHeight, outputWidth};
if (type == kForwardTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.run();
} else if (type == kBackwardInputTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
test.run();
} else if (type == kBackwardFilterTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.run();
}
}
}
}
}
}
}
}
};
// ======Start Convolution TEST====== // ======Start Convolution TEST======
TEST(Forward, GEMM) { TEST(Forward, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test( ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest); "NaiveConv-CPU", "GemmConv-CPU", convolutionType, kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2( ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2(
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest); "NaiveConv-CPU", "GemmConv-CPU", convolutionType, kForwardTest);
} }
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
TEST(Forward, GEMM2) { TEST(Forward, GEMM2) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConv-CPU", "GemmConv-GPU", kForwardTest); "GemmConv-CPU", "GemmConv-GPU", convolutionType, kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConv-CPU", "GemmConv-GPU", kForwardTest); "GemmConv-CPU", "GemmConv-GPU", convolutionType, kForwardTest);
} }
TEST(BackwardInput, GEMM) { TEST(BackwardInput, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest); "GemmConvGradInput-CPU",
"GemmConvGradInput-GPU",
convolutionType,
kBackwardInputTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest); "GemmConvGradInput-CPU",
"GemmConvGradInput-GPU",
convolutionType,
kBackwardInputTest);
} }
TEST(BackwardFilter, GEMM) { TEST(BackwardFilter, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest); "GemmConvGradFilter-CPU",
"GemmConvGradFilter-GPU",
convolutionType,
kBackwardFilterTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest); "GemmConvGradFilter-CPU",
"GemmConvGradFilter-GPU",
convolutionType,
kBackwardFilterTest);
} }
#endif #endif
// ======End Convolution TEST====== // ======End Convolution TEST======
...@@ -364,38 +275,54 @@ TEST(BackwardFilter, GEMM) { ...@@ -364,38 +275,54 @@ TEST(BackwardFilter, GEMM) {
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
TEST(DepthwiseConvForward, GEMM) { TEST(DepthwiseConvForward, GEMM) {
DepthwiseConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test( ConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test(
"GemmConv-GPU", "DepthwiseConv-GPU", kForwardTest); "GemmConv-GPU",
DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( "DepthwiseConv-GPU",
"GemmConv-GPU", "DepthwiseConv-GPU", kForwardTest); depthwiseConvolutionType,
kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConv-GPU",
"DepthwiseConv-GPU",
depthwiseConvolutionType,
kForwardTest);
} }
TEST(DepthwiseConvForward, GEMM2) { TEST(DepthwiseConvForward, GEMM2) {
DepthwiseConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test( ConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test(
"DepthwiseConv-GPU", "DepthwiseConv-GPU", kForwardTest); "DepthwiseConv-GPU",
DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( "DepthwiseConv-GPU",
"DepthwiseConv-GPU", "DepthwiseConv-GPU", kForwardTest); depthwiseConvolutionType,
kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"DepthwiseConv-GPU",
"DepthwiseConv-GPU",
depthwiseConvolutionType,
kForwardTest);
} }
TEST(DepthwiseConvBackwardInput, GEMM) { TEST(DepthwiseConvBackwardInput, GEMM) {
DepthwiseConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"DepthwiseConvGradInput-GPU", "DepthwiseConvGradInput-GPU",
"DepthwiseConvGradInput-GPU", "DepthwiseConvGradInput-GPU",
depthwiseConvolutionType,
kBackwardInputTest); kBackwardInputTest);
DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"DepthwiseConvGradInput-GPU", "DepthwiseConvGradInput-GPU",
"DepthwiseConvGradInput-GPU", "DepthwiseConvGradInput-GPU",
depthwiseConvolutionType,
kBackwardInputTest); kBackwardInputTest);
} }
TEST(DepthwiseConvBackwardFilter, GEMM) { TEST(DepthwiseConvBackwardFilter, GEMM) {
DepthwiseConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test( ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"DepthwiseConvGradFilter-GPU", "DepthwiseConvGradFilter-GPU",
"DepthwiseConvGradFilter-GPU", "DepthwiseConvGradFilter-GPU",
depthwiseConvolutionType,
kBackwardFilterTest); kBackwardFilterTest);
DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2( ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"DepthwiseConvGradFilter-GPU", "DepthwiseConvGradFilter-GPU",
"DepthwiseConvGradFilter-GPU", "DepthwiseConvGradFilter-GPU",
depthwiseConvolutionType,
kBackwardFilterTest); kBackwardFilterTest);
} }
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册