/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include "ConvBaseLayer.h" #include "paddle/math/Matrix.h" #include namespace paddle { /** * @brief A subclass of ConvBaseLayer by cuDNN implementation. It only * supports GPU mode. We automatic select CudnnConvLayer for GPU * mode and ExpandConvLayer for CPU mode if you set type of "conv". * User also can specfiy type of "exconv" or "cudnn_conv" for * particular type. * * The config file api is img_conv_layer. */ class CudnnConvLayer : public ConvBaseLayer { private: /// resize Cudnn workspace size void allocConvWorkSpace(size_t maxWorkSpace); protected: int imageH_, imageW_, outputH_, outputW_; /// Cudnn tensor descriptor for bias. hl_tensor_descriptor biasDesc_; /// Cudnn tensor descriptor for input. std::vector inputDesc_; /// Cudnn tensor descriptor for output. std::vector outputDesc_; /// Cudnn tensor descriptor for filter. std::vector filterDesc_; /// Cudnn tensor descriptor for a convolution operation. std::vector convDesc_; /// One sample offset of input data. IntV inputOffset_; /// One sample offset of output data. IntV outputOffset_; /// One group offset of weight. IntV weightOffset_; /// One group offset of bias. int biasOffset_; /// Save the algorithm for forward convolution, which is obtained by cudnn /// api to search the best suited algorithm. std::vector fwdAlgo_; /// Save the algorithm for computing convolution gradient with respect to /// filter coefficients. std::vector bwdFilterAlgo_; /// Save the algorithm for computing convolution gradient with respect to /// the output. std::vector bwdDataAlgo_; /// Amount of GPU memory needed as workspace to be able to execute a /// forward convolution with the specified algo. std::vector fwdLimitBytes_; /// Amount of GPU memory needed as workspace to be able to execute a /// backwardFilter with the specified algo. std::vector bwdFilterLimitBytes_; /// Amount of GPU memory needed as workspace to be able to execute a /// backwardData with the specified algo. std::vector bwdDataLimitBytes_; /// Device work space address for each group. std::vector workSpace_; /// Max number of groups. int maxGroups_; /// Total work space address in device for all groups. void* workSpaceData_; /// Size of total work space. size_t workSpaceInBytes_; /// Is or not select conv algorihtm. bool isSelectAlgo_; public: explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {} ~CudnnConvLayer(); /** * Intialization. Initialize member variables and create tenor descriptor. */ bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); /** * Reshape is done each forward. Reshape tensor decriptor * inputDesc_, outputDesc_, convDesc_. And search the faster algo * or the fastest algo within a given memeory limit. */ void reshape(int batchSize); void forward(PassType passType); void backward(const UpdateCallback& callback); void addBiases(); void bpropBiases(); }; } // namespace paddle