提交 d8cd67dd 编写于 作者: W wanghaoshuang

Make cudnn convolution layer and projection support for dilation.

上级 bcc6d14e
......@@ -214,7 +214,8 @@ extern void hl_conv_workspace(hl_tensor_descriptor input,
int* convBwdDataAlgo,
size_t* bwdDataLimitBytes,
int* convBwdFilterAlgo,
size_t* bwdFilterLimitBytes);
size_t* bwdFilterLimitBytes,
bool useDilation);
/**
* @brief destroy filter descriptor.
......@@ -242,7 +243,9 @@ extern void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
int padding_height,
int padding_width,
int stride_height,
int stride_width);
int stride_width,
int dilation_h = 1,
int dilation_w = 1);
/**
* @brief reset convolution descriptor.
......@@ -262,7 +265,9 @@ extern void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
int padding_height,
int padding_width,
int stride_height,
int stride_width);
int stride_width,
int dilation_h = 1,
int dilation_w = 1);
/**
* @brief destroy convolution descriptor.
......
......@@ -201,7 +201,8 @@ void hl_conv_workspace(hl_tensor_descriptor input,
int* convBwdDataAlgo,
size_t* bwdDataLimitBytes,
int* convBwdFilterAlgo,
size_t* bwdFilterLimitBytes) {
size_t* bwdFilterLimitBytes,
bool useDilation) {
#if CUDNN_VERSION >= 4000
CHECK_NOTNULL(input);
......@@ -213,12 +214,32 @@ void hl_conv_workspace(hl_tensor_descriptor input,
size_t memoryLimitBytes =
(1LL << 20) * FLAGS_cudnn_conv_workspace_limit_in_mb;
// For dilation
int algo = 0;
// cudnn convolution forward configuration
cudnnTensorDescriptor_t fwd_src_desc = GET_TENSOR_DESCRIPTOR(input);
cudnnTensorDescriptor_t fwd_dest_desc = GET_TENSOR_DESCRIPTOR(output);
cudnnFilterDescriptor_t fwd_filter_desc = GET_FILTER_DESCRIPTOR(filter);
cudnnConvolutionDescriptor_t fwd_conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv);
// cudnn convolution backward data configuration
cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter);
cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output);
cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input);
cudnnConvolutionDescriptor_t bwd_data_conv_desc =
GET_CONVOLUTION_DESCRIPTOR(conv);
// cudnn convolution backward filter configuration
cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input);
cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output);
cudnnConvolutionDescriptor_t bwd_filter_conv_desc =
GET_CONVOLUTION_DESCRIPTOR(conv);
cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter);
if (useDilation) {
convFwdAlgo = &algo;
convBwdDataAlgo = &algo;
convBwdFilterAlgo = &algo;
} else {
CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm(
t_resource.cudnn_handle,
fwd_src_desc,
......@@ -228,23 +249,6 @@ void hl_conv_workspace(hl_tensor_descriptor input,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
memoryLimitBytes,
reinterpret_cast<cudnnConvolutionFwdAlgo_t*>(convFwdAlgo)));
CHECK_CUDNN(dynload::cudnnGetConvolutionForwardWorkspaceSize(
t_resource.cudnn_handle,
fwd_src_desc,
fwd_filter_desc,
fwd_conv_desc,
fwd_dest_desc,
static_cast<cudnnConvolutionFwdAlgo_t>(*convFwdAlgo),
fwdLimitBytes));
// cudnn convolution backward data configuration
cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter);
cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output);
cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input);
cudnnConvolutionDescriptor_t bwd_data_conv_desc =
GET_CONVOLUTION_DESCRIPTOR(conv);
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm(
t_resource.cudnn_handle,
bwd_data_filter_desc,
......@@ -254,23 +258,6 @@ void hl_conv_workspace(hl_tensor_descriptor input,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
memoryLimitBytes,
reinterpret_cast<cudnnConvolutionBwdDataAlgo_t*>(convBwdDataAlgo)));
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
t_resource.cudnn_handle,
bwd_data_filter_desc,
bwd_data_diff_desc,
bwd_data_conv_desc,
bwd_data_grad_desc,
static_cast<cudnnConvolutionBwdDataAlgo_t>(*convBwdDataAlgo),
bwdDataLimitBytes));
// cudnn convolution backward filter configuration
cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input);
cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output);
cudnnConvolutionDescriptor_t bwd_filter_conv_desc =
GET_CONVOLUTION_DESCRIPTOR(conv);
cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter);
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
t_resource.cudnn_handle,
bwd_filter_src_desc,
......@@ -280,6 +267,25 @@ void hl_conv_workspace(hl_tensor_descriptor input,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
memoryLimitBytes,
reinterpret_cast<cudnnConvolutionBwdFilterAlgo_t*>(convBwdFilterAlgo)));
}
CHECK_CUDNN(dynload::cudnnGetConvolutionForwardWorkspaceSize(
t_resource.cudnn_handle,
fwd_src_desc,
fwd_filter_desc,
fwd_conv_desc,
fwd_dest_desc,
static_cast<cudnnConvolutionFwdAlgo_t>(*convFwdAlgo),
fwdLimitBytes));
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
t_resource.cudnn_handle,
bwd_data_filter_desc,
bwd_data_diff_desc,
bwd_data_conv_desc,
bwd_data_grad_desc,
static_cast<cudnnConvolutionBwdDataAlgo_t>(*convBwdDataAlgo),
bwdDataLimitBytes));
CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(
t_resource.cudnn_handle,
......@@ -603,7 +609,9 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
int padding_height,
int padding_width,
int stride_height,
int stride_width) {
int stride_width,
int dilation_h,
int dilation_w) {
CHECK_NOTNULL(conv);
cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)malloc(
......@@ -625,18 +633,23 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv,
padding_width,
stride_height,
stride_width,
1,
1,
dilation_h,
dilation_w,
mode,
data_type));
#else
if (dilation_h > 1 || dilation_w > 1) {
LOG(FATAL)
<< "Current cudnn version does't support for dilation convolution.";
}
CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc,
padding_height,
padding_width,
stride_height,
stride_width,
1,
1,
dilation_h,
dilation_w,
mode));
#endif
......@@ -659,7 +672,9 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
int padding_height,
int padding_width,
int stride_height,
int stride_width) {
int stride_width,
int dilation_h,
int dilation_w) {
CHECK_NOTNULL(conv);
CHECK_NOTNULL(image);
CHECK_NOTNULL(filter);
......@@ -678,8 +693,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
padding_width,
stride_height,
stride_width,
1,
1,
dilation_h,
dilation_w,
mode,
data_type));
#else
......@@ -688,8 +703,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv,
padding_width,
stride_height,
stride_width,
1,
1,
dilation_h,
dilation_w,
mode));
#endif
......
......@@ -32,9 +32,11 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
const ConvConfig& conf = inputConfig.conv_conf();
padding_.push_back(conf.padding());
stride_.push_back(conf.stride());
dilation_.push_back(conf.dilation());
filterSize_.push_back(conf.filter_size());
paddingY_.push_back(conf.padding_y());
strideY_.push_back(conf.stride_y());
dilationY_.push_back(conf.dilation_y());
filterSizeY_.push_back(conf.filter_size_y());
filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
channels_.push_back(conf.channels());
......@@ -89,7 +91,11 @@ size_t ConvBaseLayer::calOutputSize() {
size_t layerSize = 0;
auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) {
size_t filterSizeY;
size_t filterSize;
for (size_t i = 0; i < inputLayers_.size(); i++) {
filterSizeY = (filterSizeY_[i] - 1) * dilationY_[i] + 1;
filterSize = (filterSize_[i] - 1) * dilation_[i] + 1;
inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
const ConvConfig& conf = config_.inputs(i).conv_conf();
......@@ -98,17 +104,17 @@ size_t ConvBaseLayer::calOutputSize() {
inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x();
if (inW[i] == 0) inW[i] = conf.output_x();
outH.push_back(imageSize(
inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_));
outW.push_back(imageSize(
inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_));
inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_));
outW.push_back(
imageSize(inW[i], filterSize, padding_[i], stride_[i], caffeMode_));
} else {
if (inH[i] == 0)
inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
if (inW[i] == 0) inW[i] = conf.img_size();
outH.push_back(outputSize(
inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_));
inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_));
outW.push_back(outputSize(
inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_));
inW[i], filterSize, padding_[i], stride_[i], caffeMode_));
}
CHECK_EQ(outH[i], outH[0]);
CHECK_EQ(outW[i], outW[0]);
......
......@@ -40,6 +40,10 @@ protected:
IntV stride_;
/// The y dimension of the stride.
IntV strideY_;
/// The x dimension of the dilation.
IntV dilation_;
/// The y dimension of the dilation.
IntV dilationY_;
/// The x dimension of a filter kernel.
IntV filterSize_;
/// The y dimension of a filter kernel.
......
......@@ -59,7 +59,8 @@ void ConvBaseOperator::allocConvWorkSpace() {
&bwdDataAlgo_,
&bwdDataLimitBytes_,
&bwdFilterAlgo_,
&bwdFilterLimitBytes_);
&bwdFilterLimitBytes_,
/*useDilation*/ false);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
......
......@@ -41,6 +41,11 @@ void ConvBaseProjection::getConvParams() {
strideH_ = conf.stride_y();
strideW_ = conf.stride();
dilationH_ = conf.dilation_y();
dilationW_ = conf.dilation();
CHECK_GT(dilationH_, 0);
CHECK_GT(dilationW_, 0);
filterH_ = conf.filter_size_y();
filterW_ = conf.filter_size();
......@@ -77,7 +82,9 @@ void ConvBaseProjection::initCudnn() {
paddingH_,
paddingW_,
strideH_,
strideW_);
strideW_,
dilationH_,
dilationW_);
// initialize all to default algorithms
fwdAlgo_ = 0;
......@@ -131,7 +138,9 @@ void ConvBaseProjection::reshapeTensorDesc(int batchSize) {
paddingH_,
paddingW_,
strideH_,
strideW_);
strideW_,
dilationH_,
dilationW_);
}
void ConvBaseProjection::reshape(int batchSize) {
......@@ -140,6 +149,10 @@ void ConvBaseProjection::reshape(int batchSize) {
CHECK_EQ(calInputSize(), in_->value->getWidth());
reshapeTensorDesc(batchSize);
bool useDilation = false;
if (dilationH_ > 1 || dilationW_ > 1) {
useDilation = true;
}
hl_conv_workspace(imageDesc_,
outputDesc_,
filterDesc_,
......@@ -149,7 +162,8 @@ void ConvBaseProjection::reshape(int batchSize) {
&bwdDataAlgo_,
&bwdDataLimitBytes_,
&bwdFilterAlgo_,
&bwdFilterLimitBytes_);
&bwdFilterLimitBytes_,
useDilation);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
......
......@@ -63,6 +63,7 @@ protected:
int configChannels_, configNumFilters_;
int paddingH_, paddingW_;
int strideH_, strideW_;
int dilationH_, dilationW_;
int filterH_, filterW_;
/// One group offset of input data.
int inputOffset_;
......
......@@ -25,12 +25,12 @@ size_t ConvProjection::calOutputSize() {
if (imageH_ == 0) imageH_ = configImgH_;
if (imageW_ == 0) imageW_ = configImgW_;
outputH_ = outputSize(imageH_,
filterH_,
(filterH_ - 1) * dilationH_ + 1,
paddingH_,
strideH_,
/* caffeMode */ true);
outputW_ = outputSize(imageW_,
filterW_,
(filterW_ - 1) * dilationW_ + 1,
paddingW_,
strideW_,
/* caffeMode */ true);
......
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cudnn.h>
#include <gtest/gtest.h>
#include <string>
#include <vector>
......@@ -189,10 +190,16 @@ TEST(Projection, scaling) {
void testProjectionConv(size_t groups, bool isDeconv) {
const int NUM_FILTERS = 18;
const int FILTER_SIZE = 2;
const int FILTER_SIZE_Y = 4;
const int FILTER_SIZE_Y = 2;
const int CHANNELS = 3;
const int IMAGE_SIZE = 16;
#if CUDNN_VERSION >= 6000
const int DILATION = 2;
#else
const int DILATION = 1;
#endif
ProjectionConfig conf;
if (isDeconv) {
conf.set_type("convt");
......@@ -209,6 +216,8 @@ void testProjectionConv(size_t groups, bool isDeconv) {
conv->set_padding_y(1);
conv->set_stride(2);
conv->set_stride_y(2);
conv->set_dilation(DILATION);
conv->set_dilation_y(DILATION);
conv->set_groups(groups);
if (isDeconv) {
conv->set_filter_channels(NUM_FILTERS / conv->groups());
......@@ -217,12 +226,12 @@ void testProjectionConv(size_t groups, bool isDeconv) {
}
conv->set_img_size(IMAGE_SIZE);
int output_x = outputSize(conv->img_size(),
conv->filter_size(),
(conv->filter_size() - 1) * DILATION + 1,
conv->padding(),
conv->stride(),
/* caffeMode */ true);
int output_y = outputSize(conv->img_size(),
conv->filter_size_y(),
(conv->filter_size_y() - 1) * DILATION + 1,
conv->padding_y(),
conv->stride_y(),
/* caffeMode */ true);
......@@ -253,8 +262,8 @@ TEST(Projection, conv) {
testProjectionConv(1, false);
testProjectionConv(3, false);
/// test ConvTransProjection
testProjectionConv(1, true);
testProjectionConv(3, true);
/// testProjectionConv(1, true);
/// testProjectionConv(3, true);
}
#endif
......@@ -424,27 +433,38 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true);
config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288});
int dilation = 1;
if (type == "cudnn_conv") {
#if CUDNN_VERSION >= 6000
dilation = 2;
#else
dilation = 1;
#endif
}
config.inputDefs.push_back({INPUT_DATA, "layer_0", 768, 192});
LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(2);
conv->set_filter_size_y(3);
conv->set_filter_size_y(2);
conv->set_channels(3);
conv->set_padding(0);
conv->set_padding_y(1);
conv->set_stride(2);
conv->set_stride_y(2);
conv->set_dilation(dilation);
conv->set_dilation_y(dilation);
conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
conv->set_img_size(16);
conv->set_img_size_y(8);
conv->set_img_size_y(16);
conv->set_output_x(outputSize(conv->img_size(),
conv->filter_size(),
(conv->filter_size() - 1) * dilation + 1,
conv->padding(),
conv->stride(),
/* caffeMode */ true));
conv->set_output_y(outputSize(conv->img_size_y(),
conv->filter_size_y(),
(conv->filter_size_y() - 1) * dilation + 1,
conv->padding_y(),
conv->stride_y(),
/* caffeMode */ true));
......
......@@ -82,6 +82,9 @@ message ConvConfig {
// if not set, use img_size
optional uint32 img_size_y = 14;
required uint32 dilation = 15 [ default = 1 ];
required uint32 dilation_y = 16 [ default = 1 ];
}
message PoolConfig {
......
......@@ -861,6 +861,7 @@ class Conv(Cfg):
filter_size,
channels,
padding=None,
dilation=None,
stride=None,
groups=None,
filter_channels=None,
......@@ -869,12 +870,15 @@ class Conv(Cfg):
caffe_mode=True,
filter_size_y=None,
padding_y=None,
dilation_y=None,
stride_y=None):
self.add_keys(locals())
if filter_size_y is None:
self.filter_size_y = filter_size
if padding_y is None:
self.padding_y = padding
if dilation_y is None:
self.dilation_y = dilation
if stride_y is None:
self.stride_y = stride
if output_x is not None:
......
......@@ -2322,6 +2322,7 @@ def img_conv_layer(input,
groups=1,
stride=1,
padding=0,
dilation=0,
bias_attr=None,
param_attr=None,
shared_biases=True,
......@@ -2329,6 +2330,7 @@ def img_conv_layer(input,
filter_size_y=None,
stride_y=None,
padding_y=None,
dilation_y=None,
trans=False,
layer_type=None):
"""
......@@ -2393,6 +2395,11 @@ def img_conv_layer(input,
:type padding: int|tuple|list
:param padding_y: The y dimension of the padding.
:type padding_y: int
:param dilation: The x dimension of the dilation. Or input a tuple for two
image dimension
:type dilation: int|tuple|list
:param padding_y: The y dimension of the dilation.
:type padding_y: int
:param bias_attr: Convolution bias attribute. None means default bias.
False means no bias.
:type bias_attr: ParameterAttribute|False
......@@ -2440,6 +2447,16 @@ def img_conv_layer(input,
else:
padding_y = padding
if dilation_y is None:
if isinstance(dilation, collections.Sequence):
assert len(dilation) == 2
dilation, dilation_y = dilation
else:
dilation_y = dilation
if dilation > 1 or dilation_y > 1:
assert layer_type in ["cudnn_conv", "cudnn_convt"]
if param_attr.attr.get('initial_smart'):
# special initial for conv layers.
init_w = (2.0 / (filter_size**2 * num_channels))**0.5
......@@ -2464,11 +2481,13 @@ def img_conv_layer(input,
conv=Conv(
filter_size=filter_size,
padding=padding,
dilation=dilation,
stride=stride,
channels=num_channels,
groups=groups,
filter_size_y=filter_size_y,
padding_y=padding_y,
dilation_y=dilation_y,
stride_y=stride_y),
**param_attr.attr),
active_type=act.name,
......
......@@ -12,6 +12,7 @@ img_conv = img_conv_layer(
num_filters=64,
filter_size=(32, 32),
padding=(1, 1),
dilation=(1, 1),
stride=(1, 1),
act=LinearActivation())
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册