From b8afb140984a39ca577c00d2b580fe6ddce15c9a Mon Sep 17 00:00:00 2001 From: wangyang59 Date: Wed, 22 Feb 2017 12:09:41 -0800 Subject: [PATCH] cudnn deconv implememtation --- paddle/gserver/CMakeLists.txt | 4 + paddle/gserver/layers/ConvBaseProjection.cpp | 204 ++++++++++++++++++ paddle/gserver/layers/ConvBaseProjection.h | 162 ++++++++++++++ paddle/gserver/layers/ConvProjection.cpp | 173 +-------------- paddle/gserver/layers/ConvProjection.h | 99 +-------- paddle/gserver/layers/ConvTransProjection.cpp | 95 ++++++++ paddle/gserver/layers/ConvTransProjection.h | 41 ++++ paddle/gserver/layers/CudnnConvBaseLayer.cpp | 121 +++++++++++ paddle/gserver/layers/CudnnConvBaseLayer.h | 53 +++++ paddle/gserver/layers/CudnnConvLayer.cpp | 93 -------- paddle/gserver/layers/CudnnConvLayer.h | 25 +-- paddle/gserver/layers/CudnnConvTransLayer.cpp | 23 ++ paddle/gserver/layers/CudnnConvTransLayer.h | 41 ++++ paddle/gserver/tests/test_ConvUnify.cpp | 3 +- paddle/gserver/tests/test_LayerGrad.cpp | 41 +++- 15 files changed, 789 insertions(+), 389 deletions(-) create mode 100644 paddle/gserver/layers/ConvBaseProjection.cpp create mode 100644 paddle/gserver/layers/ConvBaseProjection.h create mode 100644 paddle/gserver/layers/ConvTransProjection.cpp create mode 100644 paddle/gserver/layers/ConvTransProjection.h create mode 100644 paddle/gserver/layers/CudnnConvBaseLayer.cpp create mode 100644 paddle/gserver/layers/CudnnConvBaseLayer.h create mode 100644 paddle/gserver/layers/CudnnConvTransLayer.cpp create mode 100644 paddle/gserver/layers/CudnnConvTransLayer.h diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 4f92150ec8..93a6a99848 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -25,12 +25,16 @@ filter_test(GSERVER_HEADER) filter_test(GSERVER_SOURCES) if(NOT WITH_GPU) list(REMOVE_ITEM GSERVER_HEADER + layers/CudnnConvBaseLayer.h layers/CudnnConvLayer.h + layers/CudnnConvTransLayer.h layers/CudnnPoolLayer.h layers/CudnnBatchNormLayer.h) list(REMOVE_ITEM GSERVER_SOURCES + layers/CudnnConvBaseLayer.cpp layers/CudnnConvLayer.cpp + layers/CudnnConvTransLayer.cpp layers/CudnnPoolLayer.cpp layers/CudnnBatchNormLayer.cpp) compile_cu_as_cpp(layers/LstmCompute.cu) diff --git a/paddle/gserver/layers/ConvBaseProjection.cpp b/paddle/gserver/layers/ConvBaseProjection.cpp new file mode 100644 index 0000000000..808f848750 --- /dev/null +++ b/paddle/gserver/layers/ConvBaseProjection.cpp @@ -0,0 +1,204 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ConvBaseProjection.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +ThreadLocalD> ConvBaseProjection::convMem_; + +ConvBaseProjection::ConvBaseProjection(const ProjectionConfig &config, + ParameterPtr parameter, + bool useGpu) + : Projection(config, parameter, useGpu) { + CHECK(useGpu); // only support GPU + getConvParams(); + initCudnn(); + + size_t height = filterH_ * filterW_ * channels_ / groups_; + size_t width = numFilters_; + weight_.reset(new Weight(height, width, parameter)); + weightOffset_ = height * width / groups_; +} + +void ConvBaseProjection::getConvParams() { + const ConvConfig &conf = config_.conv_conf(); + paddingH_ = conf.padding_y(); + paddingW_ = conf.padding(); + + strideH_ = conf.stride_y(); + strideW_ = conf.stride(); + + filterH_ = conf.filter_size_y(); + filterW_ = conf.filter_size(); + + configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); + configImgW_ = conf.img_size(); + + configOutH_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); + configOutW_ = conf.output_x(); + + configChannels_ = conf.channels(); + configNumFilters_ = config_.num_filters(); + + isDeconv_ = (config_.type() == "conv") ? false : true; + + channels_ = (isDeconv_) ? configNumFilters_ : configChannels_; + numFilters_ = (isDeconv_) ? configChannels_ : configNumFilters_; + + groups_ = conf.groups(); + CHECK_EQ(channels_ % groups_, 0); + CHECK_EQ(numFilters_ % groups_, 0); +} + +void ConvBaseProjection::initCudnn() { + hl_create_filter_descriptor(&filterDesc_, + channels_ / groups_, + numFilters_ / groups_, + filterH_, + filterW_); + hl_create_tensor_descriptor(&imageDesc_); + hl_create_tensor_descriptor(&outputDesc_); + hl_create_convolution_descriptor(&convDesc_, + imageDesc_, + filterDesc_, + paddingH_, + paddingW_, + strideH_, + strideW_); + + // initialize all to default algorithms + fwdAlgo_ = 0; + bwdFilterAlgo_ = 0; + bwdDataAlgo_ = 0; + fwdLimitBytes_ = 0; + bwdDataLimitBytes_ = 0; + bwdFilterLimitBytes_ = 0; + workSpaceInBytes_ = 0; + + batchNum_ = 0; + isSelectAlgo_ = false; +} + +void ConvBaseProjection::reshapeTensorDesc(int batchSize) { + hl_tensor_reshape(imageDesc_, + batchSize, + channels_ / groups_, + imageH_, + imageW_, + channels_ * imageH_ * imageW_, + imageH_ * imageW_, + imageW_, + 1); + hl_reset_convolution_descriptor(convDesc_, + imageDesc_, + filterDesc_, + paddingH_, + paddingW_, + strideH_, + strideW_); + + // The stride between two consecutive images in ConvProjection may not be 1, + // for example, in the case of layer ConcatenateLayer2 with two + // ConvProjection, the stride is the output_size of layer ConcatenateLayer2. + // So the calculation of nStride is different from CudnnConvLayer. + // In fact, only "nStride = out_->value->getStride()" is ok. + // size_t nStride = numFilters_ * outputH_ * outputW_; + // if (out_->value->isContiguous()) { + // CHECK_EQ(nStride, out_->value->getWidth()); + // } else { + // nStride = out_->value->getStride(); + // } + size_t nStride = out_->value->getStride(); + + hl_tensor_reshape(outputDesc_, + batchSize, + numFilters_ / groups_, + outputH_, + outputW_, + nStride, + outputH_ * outputW_, + outputW_, + 1); +} + +void ConvBaseProjection::reshape(int batchSize) { + size_t width = calOutputSize(); + CHECK_EQ(width, out_->value->getWidth()); + if (isDeconv_) { + CHECK_EQ(static_cast(configChannels_ * outputH_ * outputW_), + in_->value->getWidth()) + << "Wrong input size for convolution transpose" + << " channels=" << configChannels_ << " outputH=" << outputH_ + << " outputW=" << outputW_ << " inputSize=" << in_->value->getWidth(); + } else { + CHECK_EQ(static_cast(configChannels_ * imageH_ * imageW_), + in_->value->getWidth()) + << "Wrong input size for convolution" + << " channels=" << configChannels_ << " imageH=" << imageH_ + << " imageW=" << imageW_ << " inputSize=" << in_->value->getWidth(); + } + + isSelectAlgo_ = (batchSize == batchNum_); + batchNum_ = batchSize; + + if (!isSelectAlgo_) { + reshapeTensorDesc(batchSize); + hl_conv_workspace(imageDesc_, + outputDesc_, + filterDesc_, + convDesc_, + &fwdAlgo_, + &fwdLimitBytes_, + &bwdDataAlgo_, + &bwdDataLimitBytes_, + &bwdFilterAlgo_, + &bwdFilterLimitBytes_); + + size_t maxWorkSpace = 0; + maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); + maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_); + workSpaceInBytes_ = maxWorkSpace; + + VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_ + << " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_; + } + + isSelectAlgo_ = true; +} + +void *ConvBaseProjection::getSpaceBytes(size_t size) { + std::vector &convMem = *convMem_; + if (convMem.empty()) { + int numDevices = hl_get_device_count(); + convMem.resize(numDevices); + } + + int devId = hl_get_device(); + MemoryHandle **localMem = &(convMem[devId]); + if (NULL == *localMem || size > (*localMem)->getAllocSize()) { + *localMem = new GpuMemoryHandle(size); + } + return (*localMem)->getBuf(); +} + +ConvBaseProjection::~ConvBaseProjection() { + hl_destroy_tensor_descriptor(imageDesc_); + hl_destroy_tensor_descriptor(outputDesc_); + hl_destroy_filter_descriptor(filterDesc_); + hl_destroy_convolution_descriptor(convDesc_); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvBaseProjection.h b/paddle/gserver/layers/ConvBaseProjection.h new file mode 100644 index 0000000000..d55769a284 --- /dev/null +++ b/paddle/gserver/layers/ConvBaseProjection.h @@ -0,0 +1,162 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Projection.h" +#include "paddle/math/MathUtils.h" + +namespace paddle { + +/** + * @brief Base class for ConvProjection and ConvTransProjection. + */ +class ConvBaseProjection : public Projection { +public: + /** + * Constructor. + */ + ConvBaseProjection(const ProjectionConfig& config, + ParameterPtr parameter, + bool useGpu); + + ~ConvBaseProjection(); + +protected: + void getConvParams(); + void initCudnn(); + + void reshapeTensorDesc(int batchSize); + void reshape(int batchSize); + + size_t calOutputSize() { + if (isDeconv_) { + outputH_ = in_->getFrameHeight(); + outputW_ = in_->getFrameWidth(); + if (outputH_ == 0) outputH_ = configOutH_; + if (outputW_ == 0) outputW_ = configOutW_; + imageH_ = imageSize(outputH_, + filterH_, + paddingH_, + strideH_, + /* caffeMode */ true); + + imageW_ = imageSize(outputW_, + filterW_, + paddingW_, + strideW_, + /* caffeMode */ true); + + const_cast(out_)->setFrameHeight(imageH_); + const_cast(out_)->setFrameWidth(imageW_); + + inputOffset_ = (configChannels_ / groups_) * outputH_ * outputW_; + outputOffset_ = (configNumFilters_ / groups_) * imageH_ * imageW_; + return imageH_ * imageW_ * configNumFilters_; + } else { + imageH_ = in_->getFrameHeight(); + imageW_ = in_->getFrameWidth(); + if (imageH_ == 0) imageH_ = configImgH_; + if (imageW_ == 0) imageW_ = configImgW_; + outputH_ = outputSize(imageH_, + filterH_, + paddingH_, + strideH_, + /* caffeMode */ true); + outputW_ = outputSize(imageW_, + filterW_, + paddingW_, + strideW_, + /* caffeMode */ true); + + const_cast(out_)->setFrameHeight(outputH_); + const_cast(out_)->setFrameWidth(outputW_); + + inputOffset_ = (configChannels_ / groups_) * imageH_ * imageW_; + outputOffset_ = (configNumFilters_ / groups_) * outputH_ * outputW_; + return outputH_ * outputW_ * configNumFilters_; + } + } + + static void* getSpaceBytes(size_t size); + + /// True if it's deconv projection layer, false if it's ConvProjection layer + bool isDeconv_; + /// imageH_ and imageW_ / outputH_ and outputW_ + /// is calculated from the input layer. + int imageH_, imageW_; + int outputH_, outputW_; + /// configImgH_ and configImgW_ / configOutH_ and configOutW_ + /// is obtained from config. + int configImgH_, configImgW_; + int configOutH_, configOutW_; + /// channels_ and numFilters_ are defined in terms of convolution semantics + int channels_, numFilters_; + /// configChannels and configNumFilters_ are obtained from config + /// For Conv they are the same as channels_ and numFilters + /// For ConvTrans they are opposite to channels_ and numFilters + int configChannels_, configNumFilters_; + int paddingH_, paddingW_; + int strideH_, strideW_; + int filterH_, filterW_; + /// One group offset of input data. + int inputOffset_; + /// One group offset of output data. + int outputOffset_; + /// One group offset of weight. + int weightOffset_; + int groups_; + + /// Cudnn tensor descriptor for input. + hl_tensor_descriptor imageDesc_; + /// Cudnn tensor descriptor for output. + hl_tensor_descriptor outputDesc_; + /// Cudnn tensor descriptor for filter. + hl_filter_descriptor filterDesc_; + /// Cudnn tensor descriptor for a convolution operation. + hl_convolution_descriptor convDesc_; + + /// Record the algorithm for forward convolution, which is obtained by cudnn + /// api to search the best suited algorithm. + int fwdAlgo_; + /// Record the algorithm for computing convolution gradient with respect to + /// filter coefficients. + int bwdFilterAlgo_; + /// Record the algorithm for computing convolution gradient with respect to + /// the output. + int bwdDataAlgo_; + /// Amount of GPU memory needed as workspace to be able to execute a + /// forward convolution with the specified algo. + size_t fwdLimitBytes_; + /// Amount of GPU memory needed as workspace to be able to execute a + /// backwardFilter with the specified algo. + size_t bwdDataLimitBytes_; + /// Amount of GPU memory needed as workspace to be able to execute a + /// backwardData with the specified algo. + size_t bwdFilterLimitBytes_; + /// Size of total work space. + size_t workSpaceInBytes_; + + /// Whether to call cuDNN api to choose conv algorithm. + bool isSelectAlgo_; + /// batchNum is used to record batch size. If the batch size is changed, + /// the selection algorithm will be called. + int batchNum_; + bool bias_; + + std::unique_ptr weight_; + static ThreadLocalD> convMem_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvProjection.cpp b/paddle/gserver/layers/ConvProjection.cpp index 0281170bc5..e106c238ab 100644 --- a/paddle/gserver/layers/ConvProjection.cpp +++ b/paddle/gserver/layers/ConvProjection.cpp @@ -19,151 +19,6 @@ namespace paddle { REGISTER_PROJECTION(conv, ConvProjection); -ThreadLocalD> ConvProjection::convMem_; - -ConvProjection::ConvProjection(const ProjectionConfig &config, - ParameterPtr parameter, - bool useGpu) - : Projection(config, parameter, useGpu) { - CHECK(useGpu); // only support GPU - getConvParams(); - initCudnn(); - - size_t height = filterH_ * filterW_ * channels_ / groups_; - size_t width = numFilters_; - weight_.reset(new Weight(height, width, parameter)); - weightOffset_ = height * width / groups_; -} - -void ConvProjection::getConvParams() { - const ConvConfig &conf = config_.conv_conf(); - paddingH_ = conf.padding_y(); - paddingW_ = conf.padding(); - - strideH_ = conf.stride_y(); - strideW_ = conf.stride(); - - filterH_ = conf.filter_size_y(); - filterW_ = conf.filter_size(); - - configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); - configImgW_ = conf.img_size(); - - channels_ = conf.channels(); - numFilters_ = config_.num_filters(); - - groups_ = conf.groups(); - CHECK_EQ(channels_ % groups_, 0); - CHECK_EQ(numFilters_ % groups_, 0); -} - -void ConvProjection::initCudnn() { - hl_create_filter_descriptor(&filterDesc_, - channels_ / groups_, - numFilters_ / groups_, - filterH_, - filterW_); - hl_create_tensor_descriptor(&inputDesc_); - hl_create_tensor_descriptor(&outputDesc_); - hl_create_convolution_descriptor(&convDesc_, - inputDesc_, - filterDesc_, - paddingH_, - paddingW_, - strideH_, - strideW_); - - // initialize all to default algorithms - fwdAlgo_ = 0; - bwdFilterAlgo_ = 0; - bwdDataAlgo_ = 0; - fwdLimitBytes_ = 0; - bwdDataLimitBytes_ = 0; - bwdFilterLimitBytes_ = 0; - workSpaceInBytes_ = 0; - - batchNum_ = 0; - isSelectAlgo_ = false; -} - -void ConvProjection::reshapeTensorDesc(int batchSize) { - hl_tensor_reshape(inputDesc_, - batchSize, - channels_ / groups_, - imageH_, - imageW_, - channels_ * imageH_ * imageW_, - imageH_ * imageW_, - imageW_, - 1); - hl_reset_convolution_descriptor(convDesc_, - inputDesc_, - filterDesc_, - paddingH_, - paddingW_, - strideH_, - strideW_); - - // The stride between two consecutive images in ConvProjection may not be 1, - // for example, in the case of layer ConcatenateLayer2 with two - // ConvProjection, the stride is the output_size of layer ConcatenateLayer2. - // So the calculation of nStride is different from CudnnConvLayer. - // In fact, only "nStride = out_->value->getStride()" is ok. - size_t nStride = numFilters_ * outputH_ * outputW_; - if (out_->value->isContiguous()) { - CHECK_EQ(nStride, out_->value->getWidth()); - } else { - nStride = out_->value->getStride(); - } - - hl_tensor_reshape(outputDesc_, - batchSize, - numFilters_ / groups_, - outputH_, - outputW_, - nStride, - outputH_ * outputW_, - outputW_, - 1); -} - -void ConvProjection::reshape(int batchSize) { - size_t width = calOutputSize(); - CHECK_EQ(width, out_->value->getWidth()); - CHECK_EQ(static_cast(channels_ * imageH_ * imageW_), - in_->value->getWidth()) - << "Wrong input size for convolution" - << " channels=" << channels_ << " imageH=" << imageH_ - << " imageW=" << imageW_ << " inputSize=" << in_->value->getWidth(); - - isSelectAlgo_ = (batchSize == batchNum_); - batchNum_ = batchSize; - - if (!isSelectAlgo_) { - reshapeTensorDesc(batchSize); - hl_conv_workspace(inputDesc_, - outputDesc_, - filterDesc_, - convDesc_, - &fwdAlgo_, - &fwdLimitBytes_, - &bwdDataAlgo_, - &bwdDataLimitBytes_, - &bwdFilterAlgo_, - &bwdFilterLimitBytes_); - - size_t maxWorkSpace = 0; - maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); - maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_); - workSpaceInBytes_ = maxWorkSpace; - - VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_ - << " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_; - } - - isSelectAlgo_ = true; -} - void ConvProjection::forward() { int batchSize = in_->value->getHeight(); reshape(batchSize); @@ -179,7 +34,7 @@ void ConvProjection::forward() { real *inputData = in_->value->getData() + g * inputOffset_; real *wgtData = weight_->getW()->getData() + g * weightOffset_; real *outData = out_->value->getData() + g * outputOffset_; - hl_convolution_forward(inputDesc_, + hl_convolution_forward(imageDesc_, inputData, outputDesc_, outData, @@ -205,7 +60,7 @@ void ConvProjection::backward(const UpdateCallback &callback) { if (weight_->getWGrad()) { real *inputData = in_->value->getData() + g * inputOffset_; real *weightGrad = weight_->getWGrad()->getData() + g * weightOffset_; - hl_convolution_backward_filter(inputDesc_, + hl_convolution_backward_filter(imageDesc_, inputData, outputDesc_, outGrad, @@ -221,7 +76,7 @@ void ConvProjection::backward(const UpdateCallback &callback) { if (NULL != preGrad) { real *inputGrad = preGrad->getData() + g * inputOffset_; real *wgtData = weight_->getW()->getData() + g * weightOffset_; - hl_convolution_backward_data(inputDesc_, + hl_convolution_backward_data(imageDesc_, inputGrad, outputDesc_, outGrad, @@ -237,26 +92,4 @@ void ConvProjection::backward(const UpdateCallback &callback) { weight_->getParameterPtr()->incUpdate(callback); } -void *ConvProjection::getSpaceBytes(size_t size) { - std::vector &convMem = *convMem_; - if (convMem.empty()) { - int numDevices = hl_get_device_count(); - convMem.resize(numDevices); - } - - int devId = hl_get_device(); - MemoryHandle **localMem = &(convMem[devId]); - if (NULL == *localMem || size > (*localMem)->getAllocSize()) { - *localMem = new GpuMemoryHandle(size); - } - return (*localMem)->getBuf(); -} - -ConvProjection::~ConvProjection() { - hl_destroy_tensor_descriptor(inputDesc_); - hl_destroy_tensor_descriptor(outputDesc_); - hl_destroy_filter_descriptor(filterDesc_); - hl_destroy_convolution_descriptor(convDesc_); -} - } // namespace paddle diff --git a/paddle/gserver/layers/ConvProjection.h b/paddle/gserver/layers/ConvProjection.h index c32e5e1d3a..c7d9178978 100644 --- a/paddle/gserver/layers/ConvProjection.h +++ b/paddle/gserver/layers/ConvProjection.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "Projection.h" +#include "ConvBaseProjection.h" #include "paddle/math/MathUtils.h" namespace paddle { @@ -22,109 +22,20 @@ namespace paddle { /** * @brief Convolution projection do the same calculation with CudnnConvLayer. */ -class ConvProjection : public Projection { +class ConvProjection : public ConvBaseProjection { public: /** * Constructor. */ ConvProjection(const ProjectionConfig& config, ParameterPtr parameter, - bool useGpu); + bool useGpu) + : ConvBaseProjection(config, parameter, useGpu) {} - ~ConvProjection(); + ~ConvProjection() {} virtual void forward(); virtual void backward(const UpdateCallback& callback); - -protected: - void getConvParams(); - void initCudnn(); - - void reshapeTensorDesc(int batchSize); - void reshape(int batchSize); - - size_t calOutputSize() { - imageH_ = in_->getFrameHeight(); - imageW_ = in_->getFrameWidth(); - if (imageH_ == 0) imageH_ = configImgH_; - if (imageW_ == 0) imageW_ = configImgW_; - outputH_ = outputSize(imageH_, - filterH_, - paddingH_, - strideH_, - /* caffeMode */ true); - outputW_ = outputSize(imageW_, - filterW_, - paddingW_, - strideW_, - /* caffeMode */ true); - - const_cast(out_)->setFrameHeight(outputH_); - const_cast(out_)->setFrameWidth(outputW_); - - inputOffset_ = (channels_ / groups_) * imageH_ * imageW_; - outputOffset_ = (numFilters_ / groups_) * outputH_ * outputW_; - return outputH_ * outputW_ * numFilters_; - } - - static void* getSpaceBytes(size_t size); - - /// imageH_ and imageW_ is calculated from the input layer. - int imageH_, imageW_; - /// configImgH_ and configImgW_ is obtained from config. - int configImgH_, configImgW_; - int outputH_, outputW_; - int channels_, numFilters_; - int paddingH_, paddingW_; - int strideH_, strideW_; - int filterH_, filterW_; - /// One group offset of input data. - int inputOffset_; - /// One group offset of output data. - int outputOffset_; - /// One group offset of weight. - int weightOffset_; - int groups_; - - /// Cudnn tensor descriptor for input. - hl_tensor_descriptor inputDesc_; - /// Cudnn tensor descriptor for output. - hl_tensor_descriptor outputDesc_; - /// Cudnn tensor descriptor for filter. - hl_filter_descriptor filterDesc_; - /// Cudnn tensor descriptor for a convolution operation. - hl_convolution_descriptor convDesc_; - - /// Record the algorithm for forward convolution, which is obtained by cudnn - /// api to search the best suited algorithm. - int fwdAlgo_; - /// Record the algorithm for computing convolution gradient with respect to - /// filter coefficients. - int bwdFilterAlgo_; - /// Record the algorithm for computing convolution gradient with respect to - /// the output. - int bwdDataAlgo_; - /// Amount of GPU memory needed as workspace to be able to execute a - /// forward convolution with the specified algo. - size_t fwdLimitBytes_; - /// Amount of GPU memory needed as workspace to be able to execute a - /// backwardFilter with the specified algo. - size_t bwdDataLimitBytes_; - /// Amount of GPU memory needed as workspace to be able to execute a - /// backwardData with the specified algo. - size_t bwdFilterLimitBytes_; - /// Size of total work space. - size_t workSpaceInBytes_; - - /// Whether to call cuDNN api to choose conv algorithm. - bool isSelectAlgo_; - /// batchNum is used to record batch size. If the batch size is changed, - /// the selection algorithm will be called. - int batchNum_; - bool bias_; - - std::unique_ptr weight_; - static ThreadLocalD> convMem_; }; } // namespace paddle diff --git a/paddle/gserver/layers/ConvTransProjection.cpp b/paddle/gserver/layers/ConvTransProjection.cpp new file mode 100644 index 0000000000..675528acef --- /dev/null +++ b/paddle/gserver/layers/ConvTransProjection.cpp @@ -0,0 +1,95 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ConvTransProjection.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_PROJECTION(convt, ConvTransProjection); + +void ConvTransProjection::forward() { + int batchSize = in_->value->getHeight(); + reshape(batchSize); + + void *workSpace = NULL; + if (workSpaceInBytes_ > 0) { + workSpace = getSpaceBytes(workSpaceInBytes_); + } + + for (int g = 0; g < groups_; ++g) { + REGISTER_TIMER_INFO("CudnnConvTransFwTimer", getName().c_str()); + + real *inData = in_->value->getData() + g * inputOffset_; + real *wgtData = weight_->getW()->getData() + g * weightOffset_; + real *outData = out_->value->getData() + g * outputOffset_; + hl_convolution_backward_data(imageDesc_, + outData, + outputDesc_, + inData, + filterDesc_, + wgtData, + convDesc_, + workSpace, + bwdDataLimitBytes_, + bwdDataAlgo_); + } +} + +void ConvTransProjection::backward(const UpdateCallback &callback) { + REGISTER_TIMER_INFO("CudnnConvTransBpTimer", getName().c_str()); + + void *workSpace = NULL; + if (workSpaceInBytes_ > 0) { + workSpace = getSpaceBytes(workSpaceInBytes_); + } + + for (int g = 0; g < groups_; ++g) { + real *outGrad = out_->grad->getData() + g * outputOffset_; + if (weight_->getWGrad()) { + real *inData = in_->value->getData() + g * inputOffset_; + real *weightGrad = weight_->getWGrad()->getData() + g * weightOffset_; + hl_convolution_backward_filter(imageDesc_, + outGrad, + outputDesc_, + inData, + filterDesc_, + weightGrad, + convDesc_, + workSpace, + bwdFilterLimitBytes_, + bwdFilterAlgo_); + } + + MatrixPtr preGrad = in_->grad; + if (NULL != preGrad) { + real *inGrad = preGrad->getData() + g * inputOffset_; + real *wgtData = weight_->getW()->getData() + g * weightOffset_; + hl_convolution_forward(imageDesc_, + outGrad, + outputDesc_, + inGrad, + filterDesc_, + wgtData, + convDesc_, + workSpace, + fwdLimitBytes_, + fwdAlgo_); + } + } + + weight_->getParameterPtr()->incUpdate(callback); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ConvTransProjection.h b/paddle/gserver/layers/ConvTransProjection.h new file mode 100644 index 0000000000..7a4f30024c --- /dev/null +++ b/paddle/gserver/layers/ConvTransProjection.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "ConvBaseProjection.h" +#include "paddle/math/MathUtils.h" + +namespace paddle { + +/** + * @brief Convolution projection do the same calculation with CudnnConvLayer. + */ +class ConvTransProjection : public ConvBaseProjection { +public: + /** + * Constructor. + */ + ConvTransProjection(const ProjectionConfig& config, + ParameterPtr parameter, + bool useGpu) + : ConvBaseProjection(config, parameter, useGpu) {} + + ~ConvTransProjection() {} + + virtual void forward(); + virtual void backward(const UpdateCallback& callback); +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/CudnnConvBaseLayer.cpp b/paddle/gserver/layers/CudnnConvBaseLayer.cpp new file mode 100644 index 0000000000..be7e32e54b --- /dev/null +++ b/paddle/gserver/layers/CudnnConvBaseLayer.cpp @@ -0,0 +1,121 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CudnnConvBaseLayer.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +bool CudnnConvBaseLayer::init(const LayerMap &layerMap, + const ParameterMap ¶meterMap) { + if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; + CHECK(useGpu_) << "CudnnConvLayer only support gpu"; + + CHECK_EQ(inputLayers_.size(), parameters_.size()); + projections_.reserve(inputLayers_.size()); + projConf_.reserve(inputLayers_.size()); + + numFilters_ = config_.num_filters(); + CHECK(config_.shared_biases()); + for (size_t i = 0; i < inputLayers_.size(); i++) { + ProjectionConfig *conf = new ProjectionConfig(); + if (isDeconv_) { + conf->set_type("convt"); + } else { + conf->set_type("conv"); + } + conf->set_num_filters(numFilters_); + ConvConfig *convConf = conf->mutable_conv_conf(); + *convConf = *(config_.mutable_inputs(i)->mutable_conv_conf()); + conf->set_input_size(getPrev(i)->getSize()); + conf->set_output_size(getSize()); + projConf_.emplace_back(conf); + projections_.emplace_back( + Projection::create(*projConf_[i], parameters_[i], useGpu_)); + } + + if (biases_.get() && sharedBiases_) { + hl_create_tensor_descriptor(&biasDesc_); + hl_create_tensor_descriptor(&outputDesc_); + hl_tensor_reshape(biasDesc_, 1, numFilters_, 1, 1); + } + + return true; +} + +void CudnnConvBaseLayer::forward(PassType passType) { + Layer::forward(passType); + + int batchSize = getInput(0).getBatchSize(); + resetOutput(batchSize, calOutputSize()); + + for (size_t i = 0; i != inputLayers_.size(); ++i) { + projections_[i]->forward(&getInput(i), &getOutput(), passType); + } + + if (biases_) { + REGISTER_TIMER_INFO("CudnnConvBiasTimer", getName().c_str()); + int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); + int outH, outW; + if (isDeconv_) { + outH = imgSizeH_[0]; + outW = imgSizeW_[0]; + } else { + outH = outputH_[0]; + outW = outputW_[0]; + } + + hl_tensor_reshape(outputDesc_, + batchSize, + numFilters_, + outH, + outW, + numFilters_ * outH * outW, + outH * outW, + outW, + 1); + real *outData = getOutputValue()->getData(); + real *biasData = biases_->getW()->getData(); + hl_convolution_forward_add_bias(biasDesc_, biasData, outputDesc_, outData); + } + + forwardActivation(); +} + +void CudnnConvBaseLayer::backward(const UpdateCallback &callback) { + backwardActivation(); + + if (biases_ && biases_->getWGrad()) { + REGISTER_TIMER_INFO("CudnnConvBpBiasTimer", getName().c_str()); + real *biasGrad = biases_->getWGrad()->getData(); + real *outGrad = getOutputGrad()->getData(); + hl_convolution_backward_bias(biasDesc_, biasGrad, outputDesc_, outGrad); + + biases_->getParameterPtr()->incUpdate(callback); + } + + for (size_t i = 0; i != inputLayers_.size(); ++i) { + projections_[i]->backward(callback); + } +} + +CudnnConvBaseLayer::~CudnnConvBaseLayer() { + if (biases_) { + hl_destroy_tensor_descriptor(biasDesc_); + hl_destroy_tensor_descriptor(outputDesc_); + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/CudnnConvBaseLayer.h b/paddle/gserver/layers/CudnnConvBaseLayer.h new file mode 100644 index 0000000000..ab46abea65 --- /dev/null +++ b/paddle/gserver/layers/CudnnConvBaseLayer.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "ConvBaseLayer.h" +#include "Projection.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * @brief A 2-dimension conv layer implemented by cuDNN. It only + * supports GPU mode. We automatic select CudnnConvLayer for GPU + * mode and ExpandConvLayer for CPU mode if you set type of "conv". + * User also can specfiy type of "exconv" or "cudnn_conv" for + * particular type. + * + * The config file api is img_conv_layer. + */ +class CudnnConvBaseLayer : public ConvBaseLayer { +protected: + std::vector> projConf_; + std::vector> projections_; + + hl_tensor_descriptor biasDesc_; + hl_tensor_descriptor outputDesc_; + +public: + explicit CudnnConvBaseLayer(const LayerConfig& config) + : ConvBaseLayer(config) {} + + ~CudnnConvBaseLayer(); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) + override; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/CudnnConvLayer.cpp b/paddle/gserver/layers/CudnnConvLayer.cpp index 978c2c1479..cce82c1628 100644 --- a/paddle/gserver/layers/CudnnConvLayer.cpp +++ b/paddle/gserver/layers/CudnnConvLayer.cpp @@ -20,97 +20,4 @@ namespace paddle { REGISTER_LAYER(cudnn_conv, CudnnConvLayer); -bool CudnnConvLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { - if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; - CHECK(useGpu_) << "CudnnConvLayer only support gpu"; - - CHECK_EQ(inputLayers_.size(), parameters_.size()); - projections_.reserve(inputLayers_.size()); - projConf_.reserve(inputLayers_.size()); - - numFilters_ = config_.num_filters(); - CHECK(config_.shared_biases()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - ProjectionConfig *conf = new ProjectionConfig(); - conf->set_type("conv"); - conf->set_num_filters(numFilters_); - ConvConfig *convConf = conf->mutable_conv_conf(); - *convConf = *(config_.mutable_inputs(i)->mutable_conv_conf()); - conf->set_input_size(getPrev(i)->getSize()); - conf->set_output_size(getSize()); - projConf_.emplace_back(conf); - projections_.emplace_back( - Projection::create(*projConf_[i], parameters_[i], useGpu_)); - } - - if (biases_.get() && sharedBiases_) { - hl_create_tensor_descriptor(&biasDesc_); - hl_create_tensor_descriptor(&outputDesc_); - hl_tensor_reshape(biasDesc_, 1, numFilters_ / groups_[0], 1, 1); - biasOffset_ = numFilters_ / groups_[0]; - } - - return true; -} - -void CudnnConvLayer::forward(PassType passType) { - Layer::forward(passType); - - int batchSize = getInput(0).getBatchSize(); - resetOutput(batchSize, calOutputSize()); - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - projections_[i]->forward(&getInput(i), &getOutput(), passType); - } - - if (biases_) { - REGISTER_TIMER_INFO("CudnnConvBiasTimer", getName().c_str()); - int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - hl_tensor_reshape(outputDesc_, - batchSize, - numFilters_ / groups_[0], - outputH_[0], - outputW_[0], - numFilters_ * outputH_[0] * outputW_[0], - outputH_[0] * outputW_[0], - outputW_[0], - 1); - outputOffset_ = getOutputValue()->getWidth() / groups_[0]; - for (int g = 0; g < groups_[0]; ++g) { - real *biasData = biases_->getW()->getData() + biasOffset_ * g; - real *outData = getOutputValue()->getData() + outputOffset_ * g; - hl_convolution_forward_add_bias( - biasDesc_, biasData, outputDesc_, outData); - } - } - - forwardActivation(); -} - -void CudnnConvLayer::backward(const UpdateCallback &callback) { - backwardActivation(); - - if (biases_ && biases_->getWGrad()) { - REGISTER_TIMER_INFO("CudnnConvBpBiasTimer", getName().c_str()); - for (int g = 0; g < groups_[0]; ++g) { - real *biasGrad = biases_->getWGrad()->getData() + biasOffset_ * g; - real *outGrad = getOutputGrad()->getData() + outputOffset_ * g; - hl_convolution_backward_bias(biasDesc_, biasGrad, outputDesc_, outGrad); - } - biases_->getParameterPtr()->incUpdate(callback); - } - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - projections_[i]->backward(callback); - } -} - -CudnnConvLayer::~CudnnConvLayer() { - if (biases_) { - hl_destroy_tensor_descriptor(biasDesc_); - hl_destroy_tensor_descriptor(outputDesc_); - } -} - } // namespace paddle diff --git a/paddle/gserver/layers/CudnnConvLayer.h b/paddle/gserver/layers/CudnnConvLayer.h index 919b1efc4e..b43ea7bffa 100644 --- a/paddle/gserver/layers/CudnnConvLayer.h +++ b/paddle/gserver/layers/CudnnConvLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include -#include "ConvBaseLayer.h" +#include "CudnnConvBaseLayer.h" #include "Projection.h" #include "paddle/math/Matrix.h" @@ -30,27 +30,12 @@ namespace paddle { * * The config file api is img_conv_layer. */ -class CudnnConvLayer : public ConvBaseLayer { -protected: - std::vector> projConf_; - std::vector> projections_; - - hl_tensor_descriptor biasDesc_; - hl_tensor_descriptor outputDesc_; - int biasOffset_; - int outputOffset_; - +class CudnnConvLayer : public CudnnConvBaseLayer { public: - explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {} - - ~CudnnConvLayer(); + explicit CudnnConvLayer(const LayerConfig& config) + : CudnnConvBaseLayer(config) {} - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - void forward(PassType passType) override; - void backward(const UpdateCallback& callback) override; - void addBiases(); - void bpropBiases(); + ~CudnnConvLayer() {} }; } // namespace paddle diff --git a/paddle/gserver/layers/CudnnConvTransLayer.cpp b/paddle/gserver/layers/CudnnConvTransLayer.cpp new file mode 100644 index 0000000000..9cecb871e1 --- /dev/null +++ b/paddle/gserver/layers/CudnnConvTransLayer.cpp @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CudnnConvTransLayer.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_LAYER(cudnn_convt, CudnnConvTransLayer); + +} // namespace paddle diff --git a/paddle/gserver/layers/CudnnConvTransLayer.h b/paddle/gserver/layers/CudnnConvTransLayer.h new file mode 100644 index 0000000000..c69dd9a344 --- /dev/null +++ b/paddle/gserver/layers/CudnnConvTransLayer.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "CudnnConvBaseLayer.h" +#include "Projection.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * @brief A 2-dimension conv layer implemented by cuDNN. It only + * supports GPU mode. We automatic select CudnnConvLayer for GPU + * mode and ExpandConvLayer for CPU mode if you set type of "conv". + * User also can specfiy type of "exconv" or "cudnn_conv" for + * particular type. + * + * The config file api is img_conv_layer. + */ +class CudnnConvTransLayer : public CudnnConvBaseLayer { +public: + explicit CudnnConvTransLayer(const LayerConfig& config) + : CudnnConvBaseLayer(config) {} + + ~CudnnConvTransLayer() {} +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_ConvUnify.cpp b/paddle/gserver/tests/test_ConvUnify.cpp index 207fc0566f..1e647b4b7a 100644 --- a/paddle/gserver/tests/test_ConvUnify.cpp +++ b/paddle/gserver/tests/test_ConvUnify.cpp @@ -34,8 +34,7 @@ DECLARE_double(checkgrad_eps); DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(prev_batch_state); -// Do one forward pass of convTrans layer and check to see if its output -// matches the given result +// Do one forward pass of ConvLayer using either exconv or cudnn_conv MatrixPtr doOneConvTest(size_t imgSize, size_t output_x, size_t stride, diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 63d3840e23..692f1d3885 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -166,15 +166,19 @@ TEST(Projection, scaling) { } } -void testProjectionConv(size_t groups) { +void testProjectionConv(size_t groups, bool isDeconv) { const int NUM_FILTERS = 18; const int FILTER_SIZE = 2; - const int FILTER_SIZE_Y = 3; + const int FILTER_SIZE_Y = 4; const int CHANNELS = 3; const int IMAGE_SIZE = 16; ProjectionConfig conf; - conf.set_type("conv"); + if (isDeconv) { + conf.set_type("convt"); + } else { + conf.set_type("conv"); + } conf.set_num_filters(NUM_FILTERS); ConvConfig* conv = conf.mutable_conv_conf(); @@ -186,7 +190,11 @@ void testProjectionConv(size_t groups) { conv->set_stride(2); conv->set_stride_y(2); conv->set_groups(groups); - conv->set_filter_channels(conv->channels() / conv->groups()); + if (isDeconv) { + conv->set_filter_channels(NUM_FILTERS / conv->groups()); + } else { + conv->set_filter_channels(conv->channels() / conv->groups()); + } conv->set_img_size(IMAGE_SIZE); int output_x = outputSize(conv->img_size(), conv->filter_size(), @@ -199,8 +207,14 @@ void testProjectionConv(size_t groups) { conv->stride_y(), /* caffeMode */ true); conv->set_output_x(output_x); - conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); - conf.set_output_size(output_x * output_y * NUM_FILTERS); + conv->set_output_y(output_y); + if (isDeconv) { + conf.set_input_size(output_x * output_y * CHANNELS); + conf.set_output_size(IMAGE_SIZE * IMAGE_SIZE * NUM_FILTERS); + } else { + conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); + conf.set_output_size(output_x * output_y * NUM_FILTERS); + } testProjectionGrad(conf, INPUT_DATA, @@ -215,8 +229,12 @@ void testProjectionConv(size_t groups) { #ifndef PADDLE_ONLY_CPU TEST(Projection, conv) { - testProjectionConv(1); - testProjectionConv(3); + /// test ConvProjection + testProjectionConv(1, false); + testProjectionConv(3, false); + /// test ConvTransProjection + testProjectionConv(1, true); + testProjectionConv(3, true); } #endif @@ -385,11 +403,11 @@ void testConvTransLayer(const string& type, bool trans, bool useGpu) { config.layerConfig.set_partial_sum(1); config.layerConfig.set_shared_biases(true); - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 288}); + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); LayerInputConfig* input = config.layerConfig.add_inputs(); ConvConfig* conv = input->mutable_conv_conf(); conv->set_filter_size(2); - conv->set_filter_size_y(3); + conv->set_filter_size_y(4); conv->set_channels(16); conv->set_padding(0); conv->set_padding_y(1); @@ -416,6 +434,9 @@ TEST(Layer, convTransLayer) { for (auto useGpu : {false, true}) { testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); } +#ifndef PADDLE_ONLY_CPU + testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); +#endif } TEST(Layer, blockExpandLayer) { -- GitLab