提交 6b7f6474 编写于 作者: W wangyang59

deconv projection/operator implementation

上级 b8afb140
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvBaseOperator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvBaseOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
ConvBaseOperator::ConvBaseOperator(const OperatorConfig &config, bool useGpu)
: Operator(config, useGpu) {
CHECK(useGpu);
CHECK_EQ(config_.input_indices_size(), 2L);
caffeMode_ = true;
getConvParams();
computeConvSizes();
// initialize all to default algorithms
fwdAlgo_ = 0;
bwdFilterAlgo_ = 0;
bwdDataAlgo_ = 0;
fwdLimitBytes_ = 0;
bwdDataLimitBytes_ = 0;
bwdFilterLimitBytes_ = 0;
workSpaceInBytes_ = 0;
workSpace_ = nullptr;
isSelectAlgo_ = false;
}
void ConvBaseOperator::allocConvWorkSpace(size_t maxWorkSpace) {
if (maxWorkSpace > workSpaceInBytes_) {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
}
// total amount of storage needed
workSpace_ = hl_malloc_device(maxWorkSpace);
workSpaceInBytes_ = maxWorkSpace;
}
}
void ConvBaseOperator::reshape(int batchSize) {
if (isDeconv_) {
outputH_ = ins_[0]->getFrameHeight();
outputW_ = ins_[0]->getFrameWidth();
if (outputH_ == 0) outputH_ = outputY_;
if (outputW_ == 0) outputW_ = outputX_;
imageH_ =
imageSize(outputH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
imageW_ = imageSize(outputW_, filterSize_, padding_, stride_, caffeMode_);
/// Check that the imageSizes are consistent with config
CHECK_EQ(imageH_, imgSizeY_);
CHECK_EQ(imageW_, imgSize_);
out_->setFrameHeight(imageH_);
out_->setFrameWidth(imageW_);
} else {
imageH_ = ins_[0]->getFrameHeight();
imageW_ = ins_[0]->getFrameWidth();
if (imageH_ == 0) imageH_ = imgSizeY_;
if (imageW_ == 0) imageW_ = imgSize_;
outputH_ =
outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
/// Check that the outputSizes are consistent with config
CHECK_EQ(outputH_, outputY_);
CHECK_EQ(outputW_, outputX_);
out_->setFrameHeight(outputH_);
out_->setFrameWidth(outputW_);
}
reshapeImageDescriptors();
if (!isSelectAlgo_) {
hl_conv_workspace(imageDesc_,
outputDesc_,
filterDesc_,
convDesc_,
&fwdAlgo_,
&fwdLimitBytes_,
&bwdDataAlgo_,
&bwdDataLimitBytes_,
&bwdFilterAlgo_,
&bwdFilterLimitBytes_);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
allocConvWorkSpace(maxWorkSpace);
}
isSelectAlgo_ = true;
}
void ConvBaseOperator::computeConvSizes() {
hl_create_filter_descriptor(
&filterDesc_, channels_, numFilters_, filterSizeY_, filterSize_);
hl_create_tensor_descriptor(&imageDesc_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_,
imageDesc_,
filterDesc_,
paddingY_,
padding_,
strideY_,
stride_);
}
void ConvBaseOperator::reshapeImageDescriptors() {
hl_tensor_reshape(imageDesc_,
1,
channels_,
imageH_,
imageW_,
channels_ * imageH_ * imageW_,
imageH_ * imageW_,
imageW_,
1);
hl_tensor_reshape(outputDesc_,
1,
numFilters_,
outputH_,
outputW_,
numFilters_ * outputH_ * outputW_,
outputH_ * outputW_,
outputW_,
1);
hl_reset_convolution_descriptor(convDesc_,
imageDesc_,
filterDesc_,
paddingY_,
padding_,
strideY_,
stride_);
if (isDeconv_) {
inputOffset_ = numFilters_ * outputH_ * outputW_;
outputOffset_ = channels_ * imageH_ * imageW_;
} else {
inputOffset_ = channels_ * imageH_ * imageW_;
outputOffset_ = numFilters_ * outputH_ * outputW_;
}
weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSizeY_;
}
void ConvBaseOperator::getConvParams() {
configNumFilters_ = config_.num_filters();
const ConvConfig &conf = config_.conv_conf();
padding_ = conf.padding();
stride_ = conf.stride();
filterSize_ = conf.filter_size();
paddingY_ = conf.padding_y();
strideY_ = conf.stride_y();
filterSizeY_ = conf.filter_size_y();
filterPixels_ = filterSize_ * filterSizeY_;
configChannels_ = conf.channels();
imgSize_ = conf.img_size();
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
imgPixels_ = imgSize_ * imgSizeY_;
CHECK_EQ(conf.groups(), 1U);
filterChannels_ = conf.filter_channels();
outputX_ = conf.output_x();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
outputs_ = outputX_ * outputX_;
isDeconv_ = (config_.type() == "conv") ? false : true;
if (isDeconv_) {
channels_ = configNumFilters_;
numFilters_ = configChannels_;
} else {
channels_ = configChannels_;
numFilters_ = configNumFilters_;
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Operator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
class ConvBaseOperator : public Operator {
public:
ConvBaseOperator(const OperatorConfig &config, bool useGpu);
/**
* Free workspace in device and destroy cudnn tensor descriptor.
*/
virtual ~ConvBaseOperator() {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
workSpaceInBytes_ = 0;
}
hl_destroy_tensor_descriptor(imageDesc_);
hl_destroy_tensor_descriptor(outputDesc_);
hl_destroy_filter_descriptor(filterDesc_);
hl_destroy_convolution_descriptor(convDesc_);
}
virtual void forward();
virtual void backward();
private:
/**
* Get convolution parameters from layer config and
* initialize member variables.
*/
void getConvParams();
/**
* Allocate Gpu Memory for cudnn convolution algorithms.
*/
void allocConvWorkSpace(size_t maxWorkSpace);
/**
* Create cudnn tensor descriptor for convolution operation.
*/
void computeConvSizes();
/**
* Reshape cudnn tensor descriptor.
*/
void reshapeImageDescriptors();
/**
* Reshape cudnn tensor descriptor.
*/
void reshape(int batchSize);
/**
* Check filter size is equal to the size calculated by parameters from
* layer config.
*/
void checkFilterSize(const MatrixPtr &filter) {
CHECK_EQ(static_cast<int>(filter->getWidth()),
filterSize_ * filterSizeY_ * channels_ * numFilters_);
}
/// Most of member variables are same with CudnnConvLayer.
/// There is no explanation here.
bool isDeconv_;
int imageH_, imageW_, outputH_, outputW_;
hl_tensor_descriptor imageDesc_;
hl_tensor_descriptor outputDesc_;
hl_filter_descriptor filterDesc_;
hl_convolution_descriptor convDesc_;
bool caffeMode_;
int inputOffset_, outputOffset_, weightOffset_;
int numFilters_, channels_;
/// from parsing config
int configNumFilters_, configChannels_;
int padding_, stride_, filterSize_, imgSize_, imgSizeY_;
int paddingY_, strideY_, filterSizeY_;
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_;
/// Following member variables are same with CudnnConvLayer.
/// There is no explanation here.
int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_;
size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_;
size_t workSpaceInBytes_;
void *workSpace_;
bool isSelectAlgo_;
};
} // namespace paddle
...@@ -93,45 +93,48 @@ void ConvBaseProjection::initCudnn() { ...@@ -93,45 +93,48 @@ void ConvBaseProjection::initCudnn() {
} }
void ConvBaseProjection::reshapeTensorDesc(int batchSize) { void ConvBaseProjection::reshapeTensorDesc(int batchSize) {
// The stride between two consecutive samples in the output of ConvProjection
// may not be numFilters_ * outputH_ * outputW_ (conv) or
// channels_ * imageH_ * imageW_ (deconv)
// for example, in the case of layer ConcatenateLayer2 with two
// ConvProjection, the stride is the output_size of layer ConcatenateLayer2.
// So the calculation of nStride is different from CudnnConvLayer.
size_t nStrideImage, nStrideOutput;
if (isDeconv_) {
nStrideImage = out_->value->getStride();
nStrideOutput = numFilters_ * outputH_ * outputW_;
} else {
nStrideImage = channels_ * imageH_ * imageW_;
nStrideOutput = out_->value->getStride();
}
hl_tensor_reshape(imageDesc_, hl_tensor_reshape(imageDesc_,
batchSize, batchSize,
channels_ / groups_, channels_ / groups_,
imageH_, imageH_,
imageW_, imageW_,
channels_ * imageH_ * imageW_, nStrideImage,
imageH_ * imageW_, imageH_ * imageW_,
imageW_, imageW_,
1); 1);
hl_reset_convolution_descriptor(convDesc_,
imageDesc_,
filterDesc_,
paddingH_,
paddingW_,
strideH_,
strideW_);
// The stride between two consecutive images in ConvProjection may not be 1,
// for example, in the case of layer ConcatenateLayer2 with two
// ConvProjection, the stride is the output_size of layer ConcatenateLayer2.
// So the calculation of nStride is different from CudnnConvLayer.
// In fact, only "nStride = out_->value->getStride()" is ok.
// size_t nStride = numFilters_ * outputH_ * outputW_;
// if (out_->value->isContiguous()) {
// CHECK_EQ(nStride, out_->value->getWidth());
// } else {
// nStride = out_->value->getStride();
// }
size_t nStride = out_->value->getStride();
hl_tensor_reshape(outputDesc_, hl_tensor_reshape(outputDesc_,
batchSize, batchSize,
numFilters_ / groups_, numFilters_ / groups_,
outputH_, outputH_,
outputW_, outputW_,
nStride, nStrideOutput,
outputH_ * outputW_, outputH_ * outputW_,
outputW_, outputW_,
1); 1);
hl_reset_convolution_descriptor(convDesc_,
imageDesc_,
filterDesc_,
paddingH_,
paddingW_,
strideH_,
strideW_);
} }
void ConvBaseProjection::reshape(int batchSize) { void ConvBaseProjection::reshape(int batchSize) {
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "Operator.h" #include "ConvOperator.h"
#include "paddle/math/MathUtils.h" #include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
...@@ -27,227 +27,8 @@ namespace paddle { ...@@ -27,227 +27,8 @@ namespace paddle {
* The config file api is conv_operator. * The config file api is conv_operator.
*/ */
class ConvOperator : public Operator {
public:
ConvOperator(const OperatorConfig &config, bool useGpu);
/**
* Free workspace in device and destroy cudnn tensor descriptor.
*/
virtual ~ConvOperator() {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
workSpaceInBytes_ = 0;
}
hl_destroy_tensor_descriptor(inputDesc_);
hl_destroy_tensor_descriptor(outputDesc_);
hl_destroy_filter_descriptor(filterDesc_);
hl_destroy_convolution_descriptor(convDesc_);
}
virtual void forward();
virtual void backward();
private:
/**
* Get convolution parameters from layer config and
* initialize member variables.
*/
void getConvParams();
/**
* Allocate Gpu Memory for cudnn convolution algorithms.
*/
void allocConvWorkSpace(size_t maxWorkSpace);
/**
* Create cudnn tensor descriptor for convolution operation.
*/
void computeConvSizes();
/**
* Reshape cudnn tensor descriptor.
*/
void reshapeImageDescriptors();
/**
* Reshape cudnn tensor descriptor.
*/
void reshape(int batchSize);
/**
* Check filter size is equal to the size calculated by parameters from
* layer config.
*/
void checkFilterSize(const MatrixPtr &filter) {
CHECK_EQ(static_cast<int>(filter->getWidth()),
filterSize_ * filterSizeY_ * channels_ * numFilters_);
}
/// Most of member variables are same with CudnnConvLayer.
/// There is no explanation here.
int imageH_, imageW_, outputH_, outputW_;
hl_tensor_descriptor inputDesc_;
hl_tensor_descriptor outputDesc_;
hl_filter_descriptor filterDesc_;
hl_convolution_descriptor convDesc_;
bool caffeMode_;
int inputOffset_, outputOffset_, weightOffset_;
int numFilters_;
int padding_, stride_, filterSize_, channels_, imgSize_, imgSizeY_;
int paddingY_, strideY_, filterSizeY_;
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_;
/// Following member variables are same with CudnnConvLayer.
/// There is no explanation here.
int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_;
size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_;
size_t workSpaceInBytes_;
void *workSpace_;
bool isSelectAlgo_;
};
REGISTER_OPERATOR(conv, ConvOperator); REGISTER_OPERATOR(conv, ConvOperator);
ConvOperator::ConvOperator(const OperatorConfig &config, bool useGpu)
: Operator(config, useGpu) {
CHECK(useGpu);
CHECK_EQ(config_.input_indices_size(), 2L);
caffeMode_ = true;
getConvParams();
computeConvSizes();
// initialize all to default algorithms
fwdAlgo_ = 0;
bwdFilterAlgo_ = 0;
bwdDataAlgo_ = 0;
fwdLimitBytes_ = 0;
bwdDataLimitBytes_ = 0;
bwdFilterLimitBytes_ = 0;
workSpaceInBytes_ = 0;
workSpace_ = nullptr;
isSelectAlgo_ = false;
}
void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
if (maxWorkSpace > workSpaceInBytes_) {
if (workSpaceInBytes_ != 0) {
hl_free_mem_device(workSpace_);
}
// total amount of storage needed
workSpace_ = hl_malloc_device(maxWorkSpace);
workSpaceInBytes_ = maxWorkSpace;
}
}
void ConvOperator::reshape(int batchSize) {
imageH_ = ins_[0]->getFrameHeight();
imageW_ = ins_[0]->getFrameWidth();
if (imageH_ == 0) imageH_ = imgSizeY_;
if (imageW_ == 0) imageW_ = imgSize_;
outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
out_->setFrameHeight(outputH_);
out_->setFrameWidth(outputW_);
reshapeImageDescriptors();
if (!isSelectAlgo_) {
hl_conv_workspace(inputDesc_,
outputDesc_,
filterDesc_,
convDesc_,
&fwdAlgo_,
&fwdLimitBytes_,
&bwdDataAlgo_,
&bwdDataLimitBytes_,
&bwdFilterAlgo_,
&bwdFilterLimitBytes_);
size_t maxWorkSpace = 0;
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
allocConvWorkSpace(maxWorkSpace);
}
isSelectAlgo_ = true;
}
void ConvOperator::computeConvSizes() {
hl_create_filter_descriptor(
&filterDesc_, channels_, numFilters_, filterSizeY_, filterSize_);
hl_create_tensor_descriptor(&inputDesc_);
int outputX =
outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_);
int outputY =
outputSize(imgSizeY_, filterSizeY_, paddingY_, strideY_, caffeMode_);
CHECK_EQ(outputX, outputX_);
CHECK_EQ(outputY, outputY_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_,
inputDesc_,
filterDesc_,
paddingY_,
padding_,
strideY_,
stride_);
}
void ConvOperator::reshapeImageDescriptors() {
hl_tensor_reshape(inputDesc_,
1,
channels_,
imageH_,
imageW_,
channels_ * imageH_ * imageW_,
imageH_ * imageW_,
imageW_,
1);
hl_tensor_reshape(outputDesc_,
1,
numFilters_,
outputH_,
outputW_,
numFilters_ * outputH_ * outputW_,
outputH_ * outputW_,
outputW_,
1);
hl_reset_convolution_descriptor(convDesc_,
inputDesc_,
filterDesc_,
paddingY_,
padding_,
strideY_,
stride_);
inputOffset_ = channels_ * imageH_ * imageW_;
outputOffset_ = numFilters_ * outputH_ * outputW_;
weightOffset_ = numFilters_ * channels_ * filterSize_ * filterSize_;
}
void ConvOperator::getConvParams() {
numFilters_ = config_.num_filters();
const ConvConfig &conf = config_.conv_conf();
padding_ = conf.padding();
stride_ = conf.stride();
filterSize_ = conf.filter_size();
paddingY_ = conf.padding_y();
strideY_ = conf.stride_y();
filterSizeY_ = conf.filter_size_y();
filterPixels_ = filterSize_ * filterSizeY_;
channels_ = conf.channels();
imgSize_ = conf.img_size();
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
imgPixels_ = imgSize_ * imgSizeY_;
CHECK_EQ(conf.groups(), 1U);
filterChannels_ = conf.filter_channels();
outputX_ = conf.output_x();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
outputs_ = outputX_ * outputX_;
}
void ConvOperator::forward() { void ConvOperator::forward() {
size_t batchSize = ins_[0]->value->getHeight(); size_t batchSize = ins_[0]->value->getHeight();
reshape(batchSize); reshape(batchSize);
...@@ -264,7 +45,7 @@ void ConvOperator::forward() { ...@@ -264,7 +45,7 @@ void ConvOperator::forward() {
real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId;
real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
real *outData = out_->value->getData() + outputOffset_ * batchId; real *outData = out_->value->getData() + outputOffset_ * batchId;
hl_convolution_forward(inputDesc_, hl_convolution_forward(imageDesc_,
inputData, inputData,
outputDesc_, outputDesc_,
outData, outData,
...@@ -287,7 +68,7 @@ void ConvOperator::backward() { ...@@ -287,7 +68,7 @@ void ConvOperator::backward() {
if (ins_[1]->grad) { if (ins_[1]->grad) {
real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId; real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId;
real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId; real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId;
hl_convolution_backward_filter(inputDesc_, hl_convolution_backward_filter(imageDesc_,
inputData, inputData,
outputDesc_, outputDesc_,
outGrad, outGrad,
...@@ -303,7 +84,7 @@ void ConvOperator::backward() { ...@@ -303,7 +84,7 @@ void ConvOperator::backward() {
if (NULL != preGrad) { if (NULL != preGrad) {
real *inputGrad = preGrad->getData() + inputOffset_ * batchId; real *inputGrad = preGrad->getData() + inputOffset_ * batchId;
real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId; real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
hl_convolution_backward_data(inputDesc_, hl_convolution_backward_data(imageDesc_,
inputGrad, inputGrad,
outputDesc_, outputDesc_,
outGrad, outGrad,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvBaseOperator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
class ConvOperator : public ConvBaseOperator {
public:
ConvOperator(const OperatorConfig &config, bool useGpu)
: ConvBaseOperator(config, useGpu) {}
/**
* Free workspace in device and destroy cudnn tensor descriptor.
*/
virtual ~ConvOperator() {}
virtual void forward();
virtual void backward();
};
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvTransOperator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvTransOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
REGISTER_OPERATOR(convt, ConvTransOperator);
void ConvTransOperator::forward() {
size_t batchSize = ins_[0]->value->getHeight();
reshape(batchSize);
CHECK_EQ(ins_[1]->value->getHeight(), batchSize);
checkFilterSize(ins_[1]->value);
Matrix::resizeOrCreate(
out_->value, batchSize, imageH_ * imageW_ * channels_, false, useGpu_);
{
AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) {
real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId;
real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
real *outData = out_->value->getData() + outputOffset_ * batchId;
hl_convolution_backward_data(imageDesc_,
outData,
outputDesc_,
inputData,
filterDesc_,
wgtData,
convDesc_,
workSpace_,
workSpaceInBytes_,
bwdDataAlgo_);
}
}
}
void ConvTransOperator::backward() {
size_t batchSize = ins_[0]->value->getHeight();
{
AsyncGpuBlock block;
for (size_t batchId = 0; batchId < batchSize; ++batchId) {
real *outGrad = out_->grad->getData() + outputOffset_ * batchId;
if (ins_[1]->grad) {
real *inputData = ins_[0]->value->getData() + inputOffset_ * batchId;
real *weightGrad = ins_[1]->grad->getData() + weightOffset_ * batchId;
hl_convolution_backward_filter(imageDesc_,
outGrad,
outputDesc_,
inputData,
filterDesc_,
weightGrad,
convDesc_,
workSpace_,
workSpaceInBytes_,
bwdFilterAlgo_);
}
MatrixPtr preGrad = ins_[0]->grad;
if (NULL != preGrad) {
real *inputGrad = preGrad->getData() + inputOffset_ * batchId;
real *wgtData = ins_[1]->value->getData() + weightOffset_ * batchId;
hl_convolution_forward(imageDesc_,
outGrad,
outputDesc_,
inputGrad,
filterDesc_,
wgtData,
convDesc_,
workSpace_,
workSpaceInBytes_,
fwdAlgo_);
}
}
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvBaseOperator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief ConvTransOperator takes two inputs to perform the convolution.
* The first input is the image, and the second input is the convolution kernel.
* The height of data for two inputs are the same. Each data of the first input
* is convolved with each data of the second input indepedently.
*
* The config file api is conv_operator.
*/
class ConvTransOperator : public ConvBaseOperator {
public:
ConvTransOperator(const OperatorConfig &config, bool useGpu)
: ConvBaseOperator(config, useGpu) {}
/**
* Free workspace in device and destroy cudnn tensor descriptor.
*/
virtual ~ConvTransOperator() {}
virtual void forward();
virtual void backward();
};
} // namespace paddle
...@@ -45,22 +45,35 @@ MatrixPtr doOneConvTest(size_t imgSize, ...@@ -45,22 +45,35 @@ MatrixPtr doOneConvTest(size_t imgSize,
size_t groups, size_t groups,
MatrixPtr& inputData, MatrixPtr& inputData,
real* param, real* param,
bool useGpu) { bool useGpu,
bool isDeconv = false) {
TestConfig config; TestConfig config;
config.biasSize = numfilters; config.biasSize = numfilters;
string layerType;
if (useGpu) { if (useGpu) {
config.layerConfig.set_type("cudnn_conv"); layerType = (isDeconv) ? "cudnn_convt" : "cudnn_conv";
} else { } else {
config.layerConfig.set_type("exconv"); layerType = (isDeconv) ? "exconvt" : "exconv";
} }
config.layerConfig.set_type(layerType);
config.layerConfig.set_num_filters(numfilters); config.layerConfig.set_num_filters(numfilters);
config.layerConfig.set_partial_sum(1); config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true); config.layerConfig.set_shared_biases(true);
size_t weightSize = channel * filter_size * filter_size * size_t weightSize = channel * filter_size * filter_size *
config.layerConfig.num_filters() / groups; config.layerConfig.num_filters() / groups;
if (isDeconv) {
config.inputDefs.push_back(
{INPUT_DATA, "layer_0", output_x * output_x * channel, weightSize});
config.layerConfig.set_size(imgSize * imgSize *
config.layerConfig.num_filters());
} else {
config.inputDefs.push_back( config.inputDefs.push_back(
{INPUT_DATA, "layer_0", imgSize * imgSize * channel, weightSize}); {INPUT_DATA, "layer_0", imgSize * imgSize * channel, weightSize});
config.layerConfig.set_size(output_x * output_x *
config.layerConfig.num_filters());
}
LayerInputConfig* input = config.layerConfig.add_inputs(); LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf(); ConvConfig* conv = input->mutable_conv_conf();
conv->set_filter_size(filter_size); conv->set_filter_size(filter_size);
...@@ -71,12 +84,15 @@ MatrixPtr doOneConvTest(size_t imgSize, ...@@ -71,12 +84,15 @@ MatrixPtr doOneConvTest(size_t imgSize,
conv->set_stride(stride); conv->set_stride(stride);
conv->set_stride_y(stride); conv->set_stride_y(stride);
conv->set_groups(groups); conv->set_groups(groups);
conv->set_filter_channels(channel / groups);
conv->set_img_size(imgSize); conv->set_img_size(imgSize);
conv->set_output_x(output_x); conv->set_output_x(output_x);
config.layerConfig.set_size(conv->output_x() * conv->output_x() * if (isDeconv) {
config.layerConfig.num_filters()); conv->set_filter_channels(numfilters / groups);
} else {
conv->set_filter_channels(channel / groups);
}
config.layerConfig.set_name("conv"); config.layerConfig.set_name("conv");
std::vector<DataLayerPtr> dataLayers; std::vector<DataLayerPtr> dataLayers;
...@@ -104,6 +120,8 @@ MatrixPtr doOneConvTest(size_t imgSize, ...@@ -104,6 +120,8 @@ MatrixPtr doOneConvTest(size_t imgSize,
TEST(Layer, convParaUnified) { TEST(Layer, convParaUnified) {
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
MatrixPtr input, resultCpu, resultGpu; MatrixPtr input, resultCpu, resultGpu;
/// TEST1 for conv ///
input = Matrix::create(1, 4 * 4, false, false); input = Matrix::create(1, 4 * 4, false, false);
real inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; real inputData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
real param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1}; real param[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1};
...@@ -120,7 +138,38 @@ TEST(Layer, convParaUnified) { ...@@ -120,7 +138,38 @@ TEST(Layer, convParaUnified) {
/*groups*/ 1, /*groups*/ 1,
input, input,
param, param,
false); /*useGpu*/ false);
resultGpu = doOneConvTest(/* imgSize */ 4,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 3,
/*channel*/ 1,
/*numfilters*/ 2,
/*groups*/ 1,
input,
param,
/*useGpu*/ true);
checkMatrixEqual(resultCpu, resultGpu);
/// TEST1 for deconv ///
input = Matrix::create(1, 2 * 2, false, false);
real inputDataT[] = {1, 2, 3, 4};
input->setData(inputDataT);
resultCpu = doOneConvTest(/* imgSize */ 4,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 3,
/*channel*/ 1,
/*numfilters*/ 2,
/*groups*/ 1,
input,
param,
/*useGpu*/ false,
/*isDeconv*/ true);
resultGpu = doOneConvTest(/* imgSize */ 4, resultGpu = doOneConvTest(/* imgSize */ 4,
/* output_x */ 2, /* output_x */ 2,
...@@ -132,9 +181,11 @@ TEST(Layer, convParaUnified) { ...@@ -132,9 +181,11 @@ TEST(Layer, convParaUnified) {
/*groups*/ 1, /*groups*/ 1,
input, input,
param, param,
true); /*useGpu*/ true,
/*isDeconv*/ true);
checkMatrixEqual(resultCpu, resultGpu); checkMatrixEqual(resultCpu, resultGpu);
/// TEST2 for conv ///
input = Matrix::create(1, 3 * 3 * 2, false, false); input = Matrix::create(1, 3 * 3 * 2, false, false);
real inputData2[] = { real inputData2[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
...@@ -152,7 +203,7 @@ TEST(Layer, convParaUnified) { ...@@ -152,7 +203,7 @@ TEST(Layer, convParaUnified) {
/*groups*/ 1, /*groups*/ 1,
input, input,
param2, param2,
false); /*useGpu*/ false);
resultGpu = doOneConvTest(/* imgSize */ 3, resultGpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2, /* output_x */ 2,
...@@ -164,9 +215,10 @@ TEST(Layer, convParaUnified) { ...@@ -164,9 +215,10 @@ TEST(Layer, convParaUnified) {
/*groups*/ 1, /*groups*/ 1,
input, input,
param2, param2,
true); /*useGpu*/ true);
checkMatrixEqual(resultCpu, resultGpu); checkMatrixEqual(resultCpu, resultGpu);
/// TEST3 for conv ///
real param3[] = {1, 2, 3, 4, 4, 3, 2, 1}; real param3[] = {1, 2, 3, 4, 4, 3, 2, 1};
resultCpu = doOneConvTest(/* imgSize */ 3, resultCpu = doOneConvTest(/* imgSize */ 3,
...@@ -179,7 +231,66 @@ TEST(Layer, convParaUnified) { ...@@ -179,7 +231,66 @@ TEST(Layer, convParaUnified) {
/*groups*/ 2, /*groups*/ 2,
input, input,
param3, param3,
false); /*useGpu*/ false);
resultGpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 2,
/*channel*/ 2,
/*numfilters*/ 2,
/*groups*/ 2,
input,
param3,
/*useGpu*/ true);
checkMatrixEqual(resultCpu, resultGpu);
/// TEST2 for deconv ///
input = Matrix::create(1, 2 * 2 * 2, false, false);
real inputData2T[] = {1, 2, 3, 4, 5, 6, 7, 8};
input->setData(inputData2T);
resultCpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 2,
/*channel*/ 2,
/*numfilters*/ 2,
/*groups*/ 1,
input,
param2,
/*useGpu*/ false,
/*isDeconv*/ true);
resultGpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 2,
/*channel*/ 2,
/*numfilters*/ 2,
/*groups*/ 1,
input,
param2,
/*useGpu*/ true,
/*isDeconv*/ true);
checkMatrixEqual(resultCpu, resultGpu);
/// TEST3 for deconv ///
resultCpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2,
/* stride */ 1,
/* padding */ 0,
/* filter_size */ 2,
/*channel*/ 2,
/*numfilters*/ 2,
/*groups*/ 2,
input,
param3,
/*useGpu*/ false,
/*isDeconv*/ true);
resultGpu = doOneConvTest(/* imgSize */ 3, resultGpu = doOneConvTest(/* imgSize */ 3,
/* output_x */ 2, /* output_x */ 2,
...@@ -191,7 +302,8 @@ TEST(Layer, convParaUnified) { ...@@ -191,7 +302,8 @@ TEST(Layer, convParaUnified) {
/*groups*/ 2, /*groups*/ 2,
input, input,
param3, param3,
true); /*useGpu*/ true,
/*isDeconv*/ true);
checkMatrixEqual(resultCpu, resultGpu); checkMatrixEqual(resultCpu, resultGpu);
#endif #endif
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册