提交 02e04b44 编写于 作者: X xzl

fuse the conv and depthwise conv together

上级 6267312a
......@@ -25,11 +25,17 @@ enum TestType {
kBackwardFilterTest = 2,
};
enum LayerType {
convolutionType = 0,
depthwiseConvolutionType = 1,
};
template <DeviceType DType1, DeviceType DType2>
class ConvolutionTest {
public:
ConvolutionTest(const std::string& conv1,
const std::string& conv2,
LayerType layerType,
TestType type,
std::string algo = "auto") {
for (size_t batchSize : {1, 32}) {
......@@ -37,7 +43,17 @@ public:
for (size_t filterSize : {1, 3, 5}) {
for (size_t inputChannels : {3, 64}) {
for (size_t outputChannels : {3, 64, 128}) {
if (inputChannels < outputChannels) break;
if (inputChannels > outputChannels) break;
if (layerType == depthwiseConvolutionType &&
outputChannels % inputChannels != 0)
break;
size_t groups = 1;
if (layerType == depthwiseConvolutionType) {
groups = inputChannels;
}
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
......@@ -62,13 +78,24 @@ public:
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("groups", groups)
.set("algo", algo));
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape filter;
if (layerType == depthwiseConvolutionType)
filter = TensorShape({groups,
outputChannels / groups,
(size_t)1,
filterSize,
filterSize});
else
filter = TensorShape({outputChannels,
inputChannels,
filterSize,
filterSize});
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
......@@ -105,6 +132,7 @@ class ConvolutionTest2 {
public:
ConvolutionTest2(const std::string& conv1,
const std::string& conv2,
LayerType layerType,
TestType type,
std::string algo = "auto") {
for (size_t batchSize : {16}) {
......@@ -113,164 +141,24 @@ public:
for (size_t filterHeight : {1, 5}) {
for (size_t filterWidth : {3, 7}) {
for (size_t inputChannels : {7}) {
for (size_t outputChannels : {32}) {
size_t stride = 1;
size_t padding = 0;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
size_t outputWidth =
(inputWidth - filterWidth + 2 * padding + stride) /
stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputHeight
<< " inputWidth=" << inputWidth
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterHeight
<< " filterWidth=" << filterWidth
<< " outputHeight=" << outputHeight
<< " outputWidth=" << outputWidth
<< " stride=" << stride << " padding=" << padding;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", algo));
TensorShape input{
batchSize, inputChannels, inputHeight, inputWidth};
TensorShape filter{
outputChannels, inputChannels, filterHeight, filterWidth};
TensorShape output{
batchSize, outputChannels, outputHeight, outputWidth};
if (type == kForwardTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.run();
} else if (type == kBackwardInputTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
test.run();
} else if (type == kBackwardFilterTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.run();
}
}
}
}
}
}
}
}
}
};
template <DeviceType DType1, DeviceType DType2>
class DepthwiseConvolutionTest {
public:
DepthwiseConvolutionTest(const std::string& conv1,
const std::string& conv2,
TestType type,
std::string algo = "auto") {
for (size_t batchSize : {1, 32}) {
for (size_t inputSize : {7, 14, 54}) {
for (size_t filterSize : {1, 3, 5}) {
for (size_t inputChannels : {64, 128}) {
size_t outputChannels = inputChannels;
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize << " stride=" << stride
<< " padding=" << padding;
for (size_t outputChannels : {7, 32}) {
if (layerType == depthwiseConvolutionType &&
outputChannels % inputChannels != 0)
break;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
size_t groups = inputChannels;
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", groups)
.set("algo", algo));
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{inputChannels, 1, 1, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
size_t groups = 1;
if (type == kForwardTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.run();
} else if (type == kBackwardInputTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
test.run();
} else if (type == kBackwardFilterTest) {
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter));
test.run();
}
}
}
}
}
}
}
if (layerType == depthwiseConvolutionType) {
groups = inputChannels;
}
};
// Mainly used to test cases where the height and width (input, filter)
// are not equal.
template <DeviceType DType1, DeviceType DType2>
class DepthwiseConvolutionTest2 {
public:
DepthwiseConvolutionTest2(const std::string& conv1,
const std::string& conv2,
TestType type,
std::string algo = "auto") {
for (size_t batchSize : {16}) {
for (size_t inputHeight : {7, 31}) {
for (size_t inputWidth : {10, 54}) {
for (size_t filterHeight : {1, 5}) {
for (size_t filterWidth : {3, 7}) {
for (size_t inputChannels : {32}) {
size_t outputChannels = inputChannels;
size_t stride = 1;
size_t padding = 0;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
size_t outputWidth =
(inputWidth - filterWidth + 2 * padding + stride) / stride;
(inputWidth - filterWidth + 2 * padding + stride) /
stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputHeight
......@@ -284,7 +172,6 @@ public:
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
size_t groups = inputChannels;
Compare2Function<DType1, DType2> test(
conv1,
conv2,
......@@ -296,8 +183,19 @@ public:
TensorShape input{
batchSize, inputChannels, inputHeight, inputWidth};
TensorShape filter{
inputChannels, 1, 1, filterHeight, filterWidth};
TensorShape filter;
if (layerType == depthwiseConvolutionType)
filter = TensorShape({groups,
outputChannels / groups,
(size_t)1,
filterHeight,
filterWidth});
else
filter = TensorShape({outputChannels,
inputChannels,
filterHeight,
filterWidth});
TensorShape output{
batchSize, outputChannels, outputHeight, outputWidth};
......@@ -324,36 +222,49 @@ public:
}
}
}
}
};
// ======Start Convolution TEST======
TEST(Forward, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest);
"NaiveConv-CPU", "GemmConv-CPU", convolutionType, kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2(
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest);
"NaiveConv-CPU", "GemmConv-CPU", convolutionType, kForwardTest);
}
#ifndef PADDLE_ONLY_CPU
TEST(Forward, GEMM2) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConv-CPU", "GemmConv-GPU", kForwardTest);
"GemmConv-CPU", "GemmConv-GPU", convolutionType, kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConv-CPU", "GemmConv-GPU", kForwardTest);
"GemmConv-CPU", "GemmConv-GPU", convolutionType, kForwardTest);
}
TEST(BackwardInput, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest);
"GemmConvGradInput-CPU",
"GemmConvGradInput-GPU",
convolutionType,
kBackwardInputTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest);
"GemmConvGradInput-CPU",
"GemmConvGradInput-GPU",
convolutionType,
kBackwardInputTest);
}
TEST(BackwardFilter, GEMM) {
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest);
"GemmConvGradFilter-CPU",
"GemmConvGradFilter-GPU",
convolutionType,
kBackwardFilterTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest);
"GemmConvGradFilter-CPU",
"GemmConvGradFilter-GPU",
convolutionType,
kBackwardFilterTest);
}
#endif
// ======End Convolution TEST======
......@@ -364,38 +275,54 @@ TEST(BackwardFilter, GEMM) {
#ifndef PADDLE_ONLY_CPU
TEST(DepthwiseConvForward, GEMM) {
DepthwiseConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test(
"GemmConv-GPU", "DepthwiseConv-GPU", kForwardTest);
DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConv-GPU", "DepthwiseConv-GPU", kForwardTest);
ConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test(
"GemmConv-GPU",
"DepthwiseConv-GPU",
depthwiseConvolutionType,
kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"GemmConv-GPU",
"DepthwiseConv-GPU",
depthwiseConvolutionType,
kForwardTest);
}
TEST(DepthwiseConvForward, GEMM2) {
DepthwiseConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test(
"DepthwiseConv-GPU", "DepthwiseConv-GPU", kForwardTest);
DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"DepthwiseConv-GPU", "DepthwiseConv-GPU", kForwardTest);
ConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test(
"DepthwiseConv-GPU",
"DepthwiseConv-GPU",
depthwiseConvolutionType,
kForwardTest);
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"DepthwiseConv-GPU",
"DepthwiseConv-GPU",
depthwiseConvolutionType,
kForwardTest);
}
TEST(DepthwiseConvBackwardInput, GEMM) {
DepthwiseConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"DepthwiseConvGradInput-GPU",
"DepthwiseConvGradInput-GPU",
depthwiseConvolutionType,
kBackwardInputTest);
DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"DepthwiseConvGradInput-GPU",
"DepthwiseConvGradInput-GPU",
depthwiseConvolutionType,
kBackwardInputTest);
}
TEST(DepthwiseConvBackwardFilter, GEMM) {
DepthwiseConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
"DepthwiseConvGradFilter-GPU",
"DepthwiseConvGradFilter-GPU",
depthwiseConvolutionType,
kBackwardFilterTest);
DepthwiseConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
"DepthwiseConvGradFilter-GPU",
"DepthwiseConvGradFilter-GPU",
depthwiseConvolutionType,
kBackwardFilterTest);
}
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部