提交 e5cbeb02 编写于 作者: W whs 提交者: GitHub

Merge pull request #3629 from wanghaoshuang/dilate_conv

Make cudnn convolution layer and projection support for dilation
...@@ -214,7 +214,8 @@ extern void hl_conv_workspace(hl_tensor_descriptor input, ...@@ -214,7 +214,8 @@ extern void hl_conv_workspace(hl_tensor_descriptor input,
int* convBwdDataAlgo, int* convBwdDataAlgo,
size_t* bwdDataLimitBytes, size_t* bwdDataLimitBytes,
int* convBwdFilterAlgo, int* convBwdFilterAlgo,
size_t* bwdFilterLimitBytes); size_t* bwdFilterLimitBytes,
bool useDilation);
/** /**
* @brief destroy filter descriptor. * @brief destroy filter descriptor.
...@@ -242,7 +243,9 @@ extern void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, ...@@ -242,7 +243,9 @@ extern void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
int padding_height, int padding_height,
int padding_width, int padding_width,
int stride_height, int stride_height,
int stride_width); int stride_width,
int dilation_h = 1,
int dilation_w = 1);
/** /**
* @brief reset convolution descriptor. * @brief reset convolution descriptor.
...@@ -262,7 +265,9 @@ extern void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, ...@@ -262,7 +265,9 @@ extern void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
int padding_height, int padding_height,
int padding_width, int padding_width,
int stride_height, int stride_height,
int stride_width); int stride_width,
int dilation_h = 1,
int dilation_w = 1);
/** /**
* @brief destroy convolution descriptor. * @brief destroy convolution descriptor.
......
...@@ -78,7 +78,9 @@ inline void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, ...@@ -78,7 +78,9 @@ inline void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
int padding_height, int padding_height,
int padding_width, int padding_width,
int stride_height, int stride_height,
int stride_width) {} int stride_width,
int dilation_h,
int dilation_w) {}
inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
hl_tensor_descriptor image, hl_tensor_descriptor image,
...@@ -86,7 +88,9 @@ inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, ...@@ -86,7 +88,9 @@ inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
int padding_height, int padding_height,
int padding_width, int padding_width,
int stride_height, int stride_height,
int stride_width) {} int stride_width,
int dilation_h,
int dilation_w) {}
inline void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) {} inline void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) {}
...@@ -99,7 +103,8 @@ inline void hl_conv_workspace(hl_tensor_descriptor input, ...@@ -99,7 +103,8 @@ inline void hl_conv_workspace(hl_tensor_descriptor input,
int* convBwdDataAlgo, int* convBwdDataAlgo,
size_t* bwdDataLimitBytes, size_t* bwdDataLimitBytes,
int* convBwdFilterAlgo, int* convBwdFilterAlgo,
size_t* bwdFilterLimitBytes) {} size_t* bwdFilterLimitBytes,
bool useDilation) {}
inline void hl_convolution_forward(hl_tensor_descriptor input, inline void hl_convolution_forward(hl_tensor_descriptor input,
real* input_data, real* input_data,
......
...@@ -201,7 +201,8 @@ void hl_conv_workspace(hl_tensor_descriptor input, ...@@ -201,7 +201,8 @@ void hl_conv_workspace(hl_tensor_descriptor input,
int* convBwdDataAlgo, int* convBwdDataAlgo,
size_t* bwdDataLimitBytes, size_t* bwdDataLimitBytes,
int* convBwdFilterAlgo, int* convBwdFilterAlgo,
size_t* bwdFilterLimitBytes) { size_t* bwdFilterLimitBytes,
bool useDilation) {
#if CUDNN_VERSION >= 4000 #if CUDNN_VERSION >= 4000
CHECK_NOTNULL(input); CHECK_NOTNULL(input);
...@@ -213,21 +214,60 @@ void hl_conv_workspace(hl_tensor_descriptor input, ...@@ -213,21 +214,60 @@ void hl_conv_workspace(hl_tensor_descriptor input,
size_t memoryLimitBytes = size_t memoryLimitBytes =
(1LL << 20) * FLAGS_cudnn_conv_workspace_limit_in_mb; (1LL << 20) * FLAGS_cudnn_conv_workspace_limit_in_mb;
// For dilation
int algo = 0;
// cudnn convolution forward configuration // cudnn convolution forward configuration
cudnnTensorDescriptor_t fwd_src_desc = GET_TENSOR_DESCRIPTOR(input); cudnnTensorDescriptor_t fwd_src_desc = GET_TENSOR_DESCRIPTOR(input);
cudnnTensorDescriptor_t fwd_dest_desc = GET_TENSOR_DESCRIPTOR(output); cudnnTensorDescriptor_t fwd_dest_desc = GET_TENSOR_DESCRIPTOR(output);
cudnnFilterDescriptor_t fwd_filter_desc = GET_FILTER_DESCRIPTOR(filter); cudnnFilterDescriptor_t fwd_filter_desc = GET_FILTER_DESCRIPTOR(filter);
cudnnConvolutionDescriptor_t fwd_conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv); cudnnConvolutionDescriptor_t fwd_conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
// cudnn convolution backward data configuration
cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter);
cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output);
cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input);
cudnnConvolutionDescriptor_t bwd_data_conv_desc =
GET_CONVOLUTION_DESCRIPTOR(conv);
// cudnn convolution backward filter configuration
cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input);
cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output);
cudnnConvolutionDescriptor_t bwd_filter_conv_desc =
GET_CONVOLUTION_DESCRIPTOR(conv);
cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter);
CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm( if (useDilation) {
t_resource.cudnn_handle, convFwdAlgo = &algo;
fwd_src_desc, convBwdDataAlgo = &algo;
fwd_filter_desc, convBwdFilterAlgo = &algo;
fwd_conv_desc, } else {
fwd_dest_desc, CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm(
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, t_resource.cudnn_handle,
memoryLimitBytes, fwd_src_desc,
reinterpret_cast<cudnnConvolutionFwdAlgo_t*>(convFwdAlgo))); fwd_filter_desc,
fwd_conv_desc,
fwd_dest_desc,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
memoryLimitBytes,
reinterpret_cast<cudnnConvolutionFwdAlgo_t*>(convFwdAlgo)));
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm(
t_resource.cudnn_handle,
bwd_data_filter_desc,
bwd_data_diff_desc,
bwd_data_conv_desc,
bwd_data_grad_desc,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
memoryLimitBytes,
reinterpret_cast<cudnnConvolutionBwdDataAlgo_t*>(convBwdDataAlgo)));
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
t_resource.cudnn_handle,
bwd_filter_src_desc,
bwd_filter_diff_desc,
bwd_filter_conv_desc,
bwd_filter_grad_desc,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
memoryLimitBytes,
reinterpret_cast<cudnnConvolutionBwdFilterAlgo_t*>(convBwdFilterAlgo)));
}
CHECK_CUDNN(dynload::cudnnGetConvolutionForwardWorkspaceSize( CHECK_CUDNN(dynload::cudnnGetConvolutionForwardWorkspaceSize(
t_resource.cudnn_handle, t_resource.cudnn_handle,
...@@ -238,23 +278,6 @@ void hl_conv_workspace(hl_tensor_descriptor input, ...@@ -238,23 +278,6 @@ void hl_conv_workspace(hl_tensor_descriptor input,
static_cast<cudnnConvolutionFwdAlgo_t>(*convFwdAlgo), static_cast<cudnnConvolutionFwdAlgo_t>(*convFwdAlgo),
fwdLimitBytes)); fwdLimitBytes));
// cudnn convolution backward data configuration
cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter);
cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output);
cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input);
cudnnConvolutionDescriptor_t bwd_data_conv_desc =
GET_CONVOLUTION_DESCRIPTOR(conv);
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm(
t_resource.cudnn_handle,
bwd_data_filter_desc,
bwd_data_diff_desc,
bwd_data_conv_desc,
bwd_data_grad_desc,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
memoryLimitBytes,
reinterpret_cast<cudnnConvolutionBwdDataAlgo_t*>(convBwdDataAlgo)));
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataWorkspaceSize( CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
t_resource.cudnn_handle, t_resource.cudnn_handle,
bwd_data_filter_desc, bwd_data_filter_desc,
...@@ -264,23 +287,6 @@ void hl_conv_workspace(hl_tensor_descriptor input, ...@@ -264,23 +287,6 @@ void hl_conv_workspace(hl_tensor_descriptor input,
static_cast<cudnnConvolutionBwdDataAlgo_t>(*convBwdDataAlgo), static_cast<cudnnConvolutionBwdDataAlgo_t>(*convBwdDataAlgo),
bwdDataLimitBytes)); bwdDataLimitBytes));
// cudnn convolution backward filter configuration
cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input);
cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output);
cudnnConvolutionDescriptor_t bwd_filter_conv_desc =
GET_CONVOLUTION_DESCRIPTOR(conv);
cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter);
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
t_resource.cudnn_handle,
bwd_filter_src_desc,
bwd_filter_diff_desc,
bwd_filter_conv_desc,
bwd_filter_grad_desc,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
memoryLimitBytes,
reinterpret_cast<cudnnConvolutionBwdFilterAlgo_t*>(convBwdFilterAlgo)));
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize( CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(
t_resource.cudnn_handle, t_resource.cudnn_handle,
bwd_filter_src_desc, bwd_filter_src_desc,
...@@ -603,7 +609,9 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, ...@@ -603,7 +609,9 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
int padding_height, int padding_height,
int padding_width, int padding_width,
int stride_height, int stride_height,
int stride_width) { int stride_width,
int dilation_h,
int dilation_w) {
CHECK_NOTNULL(conv); CHECK_NOTNULL(conv);
cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)malloc( cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)malloc(
...@@ -625,18 +633,24 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, ...@@ -625,18 +633,24 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
padding_width, padding_width,
stride_height, stride_height,
stride_width, stride_width,
1, dilation_h,
1, dilation_w,
mode, mode,
data_type)); data_type));
#else #else
if (dilation_h > 1 || dilation_w > 1) {
LOG(FATAL)
<< "Current cuDNN version does't support for dilation convolution. "
<< "The dilation convolution requires cuDNN >= v6.0.";
}
CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc, CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc,
padding_height, padding_height,
padding_width, padding_width,
stride_height, stride_height,
stride_width, stride_width,
1, dilation_h,
1, dilation_w,
mode)); mode));
#endif #endif
...@@ -659,7 +673,9 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, ...@@ -659,7 +673,9 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
int padding_height, int padding_height,
int padding_width, int padding_width,
int stride_height, int stride_height,
int stride_width) { int stride_width,
int dilation_h,
int dilation_w) {
CHECK_NOTNULL(conv); CHECK_NOTNULL(conv);
CHECK_NOTNULL(image); CHECK_NOTNULL(image);
CHECK_NOTNULL(filter); CHECK_NOTNULL(filter);
...@@ -678,8 +694,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, ...@@ -678,8 +694,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
padding_width, padding_width,
stride_height, stride_height,
stride_width, stride_width,
1, dilation_h,
1, dilation_w,
mode, mode,
data_type)); data_type));
#else #else
...@@ -688,8 +704,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, ...@@ -688,8 +704,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
padding_width, padding_width,
stride_height, stride_height,
stride_width, stride_width,
1, dilation_h,
1, dilation_w,
mode)); mode));
#endif #endif
......
...@@ -32,9 +32,11 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, ...@@ -32,9 +32,11 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
const ConvConfig& conf = inputConfig.conv_conf(); const ConvConfig& conf = inputConfig.conv_conf();
padding_.push_back(conf.padding()); padding_.push_back(conf.padding());
stride_.push_back(conf.stride()); stride_.push_back(conf.stride());
dilation_.push_back(conf.dilation());
filterSize_.push_back(conf.filter_size()); filterSize_.push_back(conf.filter_size());
paddingY_.push_back(conf.padding_y()); paddingY_.push_back(conf.padding_y());
strideY_.push_back(conf.stride_y()); strideY_.push_back(conf.stride_y());
dilationY_.push_back(conf.dilation_y());
filterSizeY_.push_back(conf.filter_size_y()); filterSizeY_.push_back(conf.filter_size_y());
filterPixels_.push_back(filterSize_.back() * filterSizeY_.back()); filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
channels_.push_back(conf.channels()); channels_.push_back(conf.channels());
...@@ -89,7 +91,11 @@ size_t ConvBaseLayer::calOutputSize() { ...@@ -89,7 +91,11 @@ size_t ConvBaseLayer::calOutputSize() {
size_t layerSize = 0; size_t layerSize = 0;
auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) { auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) {
size_t filterSizeY;
size_t filterSize;
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
filterSizeY = (filterSizeY_[i] - 1) * dilationY_[i] + 1;
filterSize = (filterSize_[i] - 1) * dilation_[i] + 1;
inH.push_back(inputLayers_[i]->getOutput().getFrameHeight()); inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
inW.push_back(inputLayers_[i]->getOutput().getFrameWidth()); inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
const ConvConfig& conf = config_.inputs(i).conv_conf(); const ConvConfig& conf = config_.inputs(i).conv_conf();
...@@ -98,17 +104,17 @@ size_t ConvBaseLayer::calOutputSize() { ...@@ -98,17 +104,17 @@ size_t ConvBaseLayer::calOutputSize() {
inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x(); inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x();
if (inW[i] == 0) inW[i] = conf.output_x(); if (inW[i] == 0) inW[i] = conf.output_x();
outH.push_back(imageSize( outH.push_back(imageSize(
inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_)); inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_));
outW.push_back(imageSize( outW.push_back(
inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_)); imageSize(inW[i], filterSize, padding_[i], stride_[i], caffeMode_));
} else { } else {
if (inH[i] == 0) if (inH[i] == 0)
inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
if (inW[i] == 0) inW[i] = conf.img_size(); if (inW[i] == 0) inW[i] = conf.img_size();
outH.push_back(outputSize( outH.push_back(outputSize(
inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_)); inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_));
outW.push_back(outputSize( outW.push_back(outputSize(
inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_)); inW[i], filterSize, padding_[i], stride_[i], caffeMode_));
} }
CHECK_EQ(outH[i], outH[0]); CHECK_EQ(outH[i], outH[0]);
CHECK_EQ(outW[i], outW[0]); CHECK_EQ(outW[i], outW[0]);
......
...@@ -40,6 +40,10 @@ protected: ...@@ -40,6 +40,10 @@ protected:
IntV stride_; IntV stride_;
/// The y dimension of the stride. /// The y dimension of the stride.
IntV strideY_; IntV strideY_;
/// The x dimension of the dilation.
IntV dilation_;
/// The y dimension of the dilation.
IntV dilationY_;
/// The x dimension of a filter kernel. /// The x dimension of a filter kernel.
IntV filterSize_; IntV filterSize_;
/// The y dimension of a filter kernel. /// The y dimension of a filter kernel.
......
...@@ -59,7 +59,8 @@ void ConvBaseOperator::allocConvWorkSpace() { ...@@ -59,7 +59,8 @@ void ConvBaseOperator::allocConvWorkSpace() {
&bwdDataAlgo_, &bwdDataAlgo_,
&bwdDataLimitBytes_, &bwdDataLimitBytes_,
&bwdFilterAlgo_, &bwdFilterAlgo_,
&bwdFilterLimitBytes_); &bwdFilterLimitBytes_,
/*useDilation*/ false);
size_t maxWorkSpace = 0; size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
......
...@@ -41,6 +41,11 @@ void ConvBaseProjection::getConvParams() { ...@@ -41,6 +41,11 @@ void ConvBaseProjection::getConvParams() {
strideH_ = conf.stride_y(); strideH_ = conf.stride_y();
strideW_ = conf.stride(); strideW_ = conf.stride();
dilationH_ = conf.dilation_y();
dilationW_ = conf.dilation();
CHECK_GT(dilationH_, 0);
CHECK_GT(dilationW_, 0);
filterH_ = conf.filter_size_y(); filterH_ = conf.filter_size_y();
filterW_ = conf.filter_size(); filterW_ = conf.filter_size();
...@@ -77,7 +82,9 @@ void ConvBaseProjection::initCudnn() { ...@@ -77,7 +82,9 @@ void ConvBaseProjection::initCudnn() {
paddingH_, paddingH_,
paddingW_, paddingW_,
strideH_, strideH_,
strideW_); strideW_,
dilationH_,
dilationW_);
// initialize all to default algorithms // initialize all to default algorithms
fwdAlgo_ = 0; fwdAlgo_ = 0;
...@@ -131,7 +138,9 @@ void ConvBaseProjection::reshapeTensorDesc(int batchSize) { ...@@ -131,7 +138,9 @@ void ConvBaseProjection::reshapeTensorDesc(int batchSize) {
paddingH_, paddingH_,
paddingW_, paddingW_,
strideH_, strideH_,
strideW_); strideW_,
dilationH_,
dilationW_);
} }
void ConvBaseProjection::reshape(int batchSize) { void ConvBaseProjection::reshape(int batchSize) {
...@@ -140,6 +149,10 @@ void ConvBaseProjection::reshape(int batchSize) { ...@@ -140,6 +149,10 @@ void ConvBaseProjection::reshape(int batchSize) {
CHECK_EQ(calInputSize(), in_->value->getWidth()); CHECK_EQ(calInputSize(), in_->value->getWidth());
reshapeTensorDesc(batchSize); reshapeTensorDesc(batchSize);
bool useDilation = false;
if (dilationH_ > 1 || dilationW_ > 1) {
useDilation = true;
}
hl_conv_workspace(imageDesc_, hl_conv_workspace(imageDesc_,
outputDesc_, outputDesc_,
filterDesc_, filterDesc_,
...@@ -149,7 +162,8 @@ void ConvBaseProjection::reshape(int batchSize) { ...@@ -149,7 +162,8 @@ void ConvBaseProjection::reshape(int batchSize) {
&bwdDataAlgo_, &bwdDataAlgo_,
&bwdDataLimitBytes_, &bwdDataLimitBytes_,
&bwdFilterAlgo_, &bwdFilterAlgo_,
&bwdFilterLimitBytes_); &bwdFilterLimitBytes_,
useDilation);
size_t maxWorkSpace = 0; size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
......
...@@ -63,6 +63,7 @@ protected: ...@@ -63,6 +63,7 @@ protected:
int configChannels_, configNumFilters_; int configChannels_, configNumFilters_;
int paddingH_, paddingW_; int paddingH_, paddingW_;
int strideH_, strideW_; int strideH_, strideW_;
int dilationH_, dilationW_;
int filterH_, filterW_; int filterH_, filterW_;
/// One group offset of input data. /// One group offset of input data.
int inputOffset_; int inputOffset_;
......
...@@ -25,12 +25,12 @@ size_t ConvProjection::calOutputSize() { ...@@ -25,12 +25,12 @@ size_t ConvProjection::calOutputSize() {
if (imageH_ == 0) imageH_ = configImgH_; if (imageH_ == 0) imageH_ = configImgH_;
if (imageW_ == 0) imageW_ = configImgW_; if (imageW_ == 0) imageW_ = configImgW_;
outputH_ = outputSize(imageH_, outputH_ = outputSize(imageH_,
filterH_, (filterH_ - 1) * dilationH_ + 1,
paddingH_, paddingH_,
strideH_, strideH_,
/* caffeMode */ true); /* caffeMode */ true);
outputW_ = outputSize(imageW_, outputW_ = outputSize(imageW_,
filterW_, (filterW_ - 1) * dilationW_ + 1,
paddingW_, paddingW_,
strideW_, strideW_,
/* caffeMode */ true); /* caffeMode */ true);
......
...@@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_ONLY_CPU
#include <cudnn.h>
#endif
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -189,10 +192,16 @@ TEST(Projection, scaling) { ...@@ -189,10 +192,16 @@ TEST(Projection, scaling) {
void testProjectionConv(size_t groups, bool isDeconv) { void testProjectionConv(size_t groups, bool isDeconv) {
const int NUM_FILTERS = 18; const int NUM_FILTERS = 18;
const int FILTER_SIZE = 2; const int FILTER_SIZE = 2;
const int FILTER_SIZE_Y = 4; const int FILTER_SIZE_Y = 2;
const int CHANNELS = 3; const int CHANNELS = 3;
const int IMAGE_SIZE = 16; const int IMAGE_SIZE = 16;
#if CUDNN_VERSION >= 6000
const int DILATION = 2;
#else
const int DILATION = 1;
#endif
ProjectionConfig conf; ProjectionConfig conf;
if (isDeconv) { if (isDeconv) {
conf.set_type("convt"); conf.set_type("convt");
...@@ -209,6 +218,8 @@ void testProjectionConv(size_t groups, bool isDeconv) { ...@@ -209,6 +218,8 @@ void testProjectionConv(size_t groups, bool isDeconv) {
conv->set_padding_y(1); conv->set_padding_y(1);
conv->set_stride(2); conv->set_stride(2);
conv->set_stride_y(2); conv->set_stride_y(2);
conv->set_dilation(DILATION);
conv->set_dilation_y(DILATION);
conv->set_groups(groups); conv->set_groups(groups);
if (isDeconv) { if (isDeconv) {
conv->set_filter_channels(NUM_FILTERS / conv->groups()); conv->set_filter_channels(NUM_FILTERS / conv->groups());
...@@ -217,12 +228,12 @@ void testProjectionConv(size_t groups, bool isDeconv) { ...@@ -217,12 +228,12 @@ void testProjectionConv(size_t groups, bool isDeconv) {
} }
conv->set_img_size(IMAGE_SIZE); conv->set_img_size(IMAGE_SIZE);
int output_x = outputSize(conv->img_size(), int output_x = outputSize(conv->img_size(),
conv->filter_size(), (conv->filter_size() - 1) * DILATION + 1,
conv->padding(), conv->padding(),
conv->stride(), conv->stride(),
/* caffeMode */ true); /* caffeMode */ true);
int output_y = outputSize(conv->img_size(), int output_y = outputSize(conv->img_size(),
conv->filter_size_y(), (conv->filter_size_y() - 1) * DILATION + 1,
conv->padding_y(), conv->padding_y(),
conv->stride_y(), conv->stride_y(),
/* caffeMode */ true); /* caffeMode */ true);
...@@ -424,27 +435,38 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -424,27 +435,38 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
config.layerConfig.set_partial_sum(1); config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true); config.layerConfig.set_shared_biases(true);
config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288}); int dilation = 1;
if (type == "cudnn_conv") {
#if CUDNN_VERSION >= 6000
dilation = 2;
#else
dilation = 1;
#endif
}
config.inputDefs.push_back({INPUT_DATA, "layer_0", 768, 192});
LayerInputConfig* input = config.layerConfig.add_inputs(); LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf(); ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(2); conv->set_filter_size(2);
conv->set_filter_size_y(3); conv->set_filter_size_y(2);
conv->set_channels(3); conv->set_channels(3);
conv->set_padding(0); conv->set_padding(0);
conv->set_padding_y(1); conv->set_padding_y(1);
conv->set_stride(2); conv->set_stride(2);
conv->set_stride_y(2); conv->set_stride_y(2);
conv->set_dilation(dilation);
conv->set_dilation_y(dilation);
conv->set_groups(1); conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups()); conv->set_filter_channels(conv->channels() / conv->groups());
conv->set_img_size(16); conv->set_img_size(16);
conv->set_img_size_y(8); conv->set_img_size_y(16);
conv->set_output_x(outputSize(conv->img_size(), conv->set_output_x(outputSize(conv->img_size(),
conv->filter_size(), (conv->filter_size() - 1) * dilation + 1,
conv->padding(), conv->padding(),
conv->stride(), conv->stride(),
/* caffeMode */ true)); /* caffeMode */ true));
conv->set_output_y(outputSize(conv->img_size_y(), conv->set_output_y(outputSize(conv->img_size_y(),
conv->filter_size_y(), (conv->filter_size_y() - 1) * dilation + 1,
conv->padding_y(), conv->padding_y(),
conv->stride_y(), conv->stride_y(),
/* caffeMode */ true)); /* caffeMode */ true));
......
...@@ -82,6 +82,9 @@ message ConvConfig { ...@@ -82,6 +82,9 @@ message ConvConfig {
// if not set, use img_size // if not set, use img_size
optional uint32 img_size_y = 14; optional uint32 img_size_y = 14;
optional uint32 dilation = 15 [ default = 1 ];
optional uint32 dilation_y = 16 [ default = 1 ];
} }
message PoolConfig { message PoolConfig {
......
...@@ -870,12 +870,16 @@ class Conv(Cfg): ...@@ -870,12 +870,16 @@ class Conv(Cfg):
caffe_mode=True, caffe_mode=True,
filter_size_y=None, filter_size_y=None,
padding_y=None, padding_y=None,
stride_y=None): stride_y=None,
dilation=None,
dilation_y=None):
self.add_keys(locals()) self.add_keys(locals())
if filter_size_y is None: if filter_size_y is None:
self.filter_size_y = filter_size self.filter_size_y = filter_size
if padding_y is None: if padding_y is None:
self.padding_y = padding self.padding_y = padding
if dilation_y is None:
self.dilation_y = dilation
if stride_y is None: if stride_y is None:
self.stride_y = stride self.stride_y = stride
if output_x is not None: if output_x is not None:
......
...@@ -2342,6 +2342,7 @@ def img_conv_layer(input, ...@@ -2342,6 +2342,7 @@ def img_conv_layer(input,
groups=1, groups=1,
stride=1, stride=1,
padding=0, padding=0,
dilation=1,
bias_attr=None, bias_attr=None,
param_attr=None, param_attr=None,
shared_biases=True, shared_biases=True,
...@@ -2349,6 +2350,7 @@ def img_conv_layer(input, ...@@ -2349,6 +2350,7 @@ def img_conv_layer(input,
filter_size_y=None, filter_size_y=None,
stride_y=None, stride_y=None,
padding_y=None, padding_y=None,
dilation_y=None,
trans=False, trans=False,
layer_type=None): layer_type=None):
""" """
...@@ -2413,6 +2415,11 @@ def img_conv_layer(input, ...@@ -2413,6 +2415,11 @@ def img_conv_layer(input,
:type padding: int|tuple|list :type padding: int|tuple|list
:param padding_y: The y dimension of the padding. :param padding_y: The y dimension of the padding.
:type padding_y: int :type padding_y: int
:param dilation: The x dimension of the dilation. Or input a tuple for two
image dimension
:type dilation: int|tuple|list
:param dilation_y: The y dimension of the dilation.
:type dilation_y: int
:param bias_attr: Convolution bias attribute. None means default bias. :param bias_attr: Convolution bias attribute. None means default bias.
False means no bias. False means no bias.
:type bias_attr: ParameterAttribute|False :type bias_attr: ParameterAttribute|False
...@@ -2460,6 +2467,13 @@ def img_conv_layer(input, ...@@ -2460,6 +2467,13 @@ def img_conv_layer(input,
else: else:
padding_y = padding padding_y = padding
if dilation_y is None:
if isinstance(dilation, collections.Sequence):
assert len(dilation) == 2
dilation, dilation_y = dilation
else:
dilation_y = dilation
if param_attr.attr.get('initial_smart'): if param_attr.attr.get('initial_smart'):
# special initial for conv layers. # special initial for conv layers.
init_w = (2.0 / (filter_size**2 * num_channels))**0.5 init_w = (2.0 / (filter_size**2 * num_channels))**0.5
...@@ -2469,6 +2483,8 @@ def img_conv_layer(input, ...@@ -2469,6 +2483,8 @@ def img_conv_layer(input,
param_attr.attr["initial_smart"] = False param_attr.attr["initial_smart"] = False
if layer_type: if layer_type:
if dilation > 1 or dilation_y > 1:
assert layer_type in ["cudnn_conv", "cudnn_convt"]
if trans: if trans:
assert layer_type in ["exconvt", "cudnn_convt"] assert layer_type in ["exconvt", "cudnn_convt"]
else: else:
...@@ -2484,11 +2500,13 @@ def img_conv_layer(input, ...@@ -2484,11 +2500,13 @@ def img_conv_layer(input,
conv=Conv( conv=Conv(
filter_size=filter_size, filter_size=filter_size,
padding=padding, padding=padding,
dilation=dilation,
stride=stride, stride=stride,
channels=num_channels, channels=num_channels,
groups=groups, groups=groups,
filter_size_y=filter_size_y, filter_size_y=filter_size_y,
padding_y=padding_y, padding_y=padding_y,
dilation_y=dilation_y,
stride_y=stride_y), stride_y=stride_y),
**param_attr.attr), **param_attr.attr),
active_type=act.name, active_type=act.name,
......
...@@ -12,6 +12,7 @@ img_conv = img_conv_layer( ...@@ -12,6 +12,7 @@ img_conv = img_conv_layer(
num_filters=64, num_filters=64,
filter_size=(32, 32), filter_size=(32, 32),
padding=(1, 1), padding=(1, 1),
dilation=(1, 1),
stride=(1, 1), stride=(1, 1),
act=LinearActivation()) act=LinearActivation())
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册