From c792ef7d5ae470031bebcd990b79c0ce7f36f7bc Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Fri, 18 Aug 2017 14:12:01 +0800 Subject: [PATCH] fix DeConv3D, Conv3D --- paddle/gserver/layers/Conv3DLayer.cpp | 248 +++++++++++++----------- paddle/gserver/layers/DeConv3DLayer.cpp | 186 +++++++++--------- 2 files changed, 229 insertions(+), 205 deletions(-) diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp index 0fa9c5f9f..5609a4cc7 100644 --- a/paddle/gserver/layers/Conv3DLayer.cpp +++ b/paddle/gserver/layers/Conv3DLayer.cpp @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "Conv3DLayer.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" -#include "Conv3DLayer.h" namespace paddle { @@ -22,32 +22,30 @@ REGISTER_LAYER(conv3d, Conv3DLayer); bool Conv3DLayer::init(const LayerMap &layerMap, const ParameterMap ¶meterMap) { - if (!ConvBaseLayer::init(layerMap, parameterMap)) - return false; + if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; int index = 0; for (auto &inputConfig : config_.inputs()) { - const ConvConfig &conf = inputConfig.conv_conf(); - M_.push_back(numFilters_ / conf.groups()); - K_.push_back( - conf.filter_channels() * conf.filter_size_z() * \ - conf.filter_size_y() * conf.filter_size()); - weights_[index]->getW()->reshape( - weights_[index]->getW()->getWidth(), - weights_[index]->getW()->getHeight()); + const ConvConfig &conf = inputConfig.conv_conf(); + M_.push_back(numFilters_ / conf.groups()); + K_.push_back(filterPixels_[index] * filterChannels_[index]); + if (nullptr != weights_[index]->getW()) + weights_[index]->getW()->reshape(weights_[index]->getW()->getWidth(), + weights_[index]->getW()->getHeight()); + if (nullptr != weights_[index]->getWGrad()) weights_[index]->getWGrad()->reshape( - weights_[index]->getWGrad()->getWidth(), - weights_[index]->getWGrad()->getHeight()); - ++index; + weights_[index]->getWGrad()->getWidth(), + weights_[index]->getWGrad()->getHeight()); + ++index; } - biases_->getWGrad()->reshape( - biases_->getWGrad()->width_, biases_->getWGrad()->height_); - biases_->getW()->reshape( - biases_->getW()->width_, biases_->getW()->height_); + if (nullptr != biases_->getWGrad()) + biases_->getWGrad()->reshape(biases_->getWGrad()->width_, + biases_->getWGrad()->height_); + if (nullptr != biases_->getW()) + biases_->getW()->reshape(biases_->getW()->width_, biases_->getW()->height_); CHECK(inputLayers_.size() == parameters_.size()); return true; } - size_t Conv3DLayer::getSize() { CHECK_NE(inputLayers_.size(), 0UL); // imgSizeH_.clear(); @@ -59,22 +57,19 @@ size_t Conv3DLayer::getSize() { N_.clear(); size_t layerSize = 0; for (size_t i = 0; i < inputLayers_.size(); ++i) { - // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); - // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); - // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); - outputW_.push_back(outputSize( - imgSizeW_[i], filterSize_[i], - padding_[i], stride_[i], true)); - outputH_.push_back(outputSize( - imgSizeH_[i], filterSizeY_[i], - paddingY_[i], strideY_[i], true)); - outputD_.push_back(outputSize( - imgSizeD_[i], filterSizeZ_[i], - paddingZ_[i], strideZ_[i], true)); - - N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); - CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); - layerSize += N_[i] * numFilters_; + // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); + // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); + // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); + outputW_.push_back(outputSize( + imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true)); + outputH_.push_back(outputSize( + imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true)); + outputD_.push_back(outputSize( + imgSizeD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true)); + + N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); + CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); + layerSize += N_[i] * numFilters_; } getOutput().setFrameHeight(outputH_[0]); getOutput().setFrameWidth(outputW_[0]); @@ -88,38 +83,46 @@ void Conv3DLayer::forward(PassType passType) { int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); int outWidth = getSize(); resetOutput(batchSize, outWidth); - const MatrixPtr outMat = getOutputValue(); for (size_t i = 0; i != inputLayers_.size(); ++i) { - REGISTER_TIMER_INFO("FwdConv3D", getName().c_str()); - const MatrixPtr& inMat = getInputValue(i); - int width = inMat->getWidth(); - int M = M_[i]; - int N = N_[i]; - int K = K_[i]; - Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); - MatrixPtr wMat = weights_[i]->getW(); - for (int n = 0; n < batchSize; ++n) { - colBuf_->vol2Col(inMat->getData() + n * width, channels_[i], - imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], - filterSizeZ_[i], filterSizeY_[i], filterSize_[i], - strideZ_[i], strideY_[i], stride_[i], - paddingZ_[i], paddingY_[i], padding_[i]); - - real *outData = outMat->getData() + n * outWidth; - MatrixPtr outMatSub = - Matrix::create(outData, groups_[i] * M, N, false, useGpu_); - for (int g = 0; g < groups_[i]; g++) { - MatrixPtr wMatSub = wMat->subMatrix(g * M, M); - MatrixPtr in = colBuf_->subMatrix(g * K, K); - MatrixPtr out = outMatSub->subMatrix(g * M, M); - out->mul(*wMatSub, *in, 1.0, 0.0); - } + REGISTER_TIMER_INFO("FwdConv3D", getName().c_str()); + const MatrixPtr &inMat = getInputValue(i); + const MatrixPtr &outMat = getOutputValue(); + int M = M_[i]; + int N = N_[i]; + int K = K_[i]; + Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); + MatrixPtr wMat = weights_[i]->getW(); + for (int n = 0; n < batchSize; ++n) { + colBuf_->vol2Col(inMat->getData() + n * inMat->getStride(), + channels_[i], + imgSizeD_[i], + imgSizeH_[i], + imgSizeW_[i], + filterSizeZ_[i], + filterSizeY_[i], + filterSize_[i], + strideZ_[i], + strideY_[i], + stride_[i], + paddingZ_[i], + paddingY_[i], + padding_[i]); + + real *outData = outMat->getData() + n * outMat->getStride(); + MatrixPtr outMatSub = + Matrix::create(outData, groups_[i] * M, N, false, useGpu_); + for (int g = 0; g < groups_[i]; g++) { + MatrixPtr wMatSub = wMat->subMatrix(g * M, M); + MatrixPtr in = colBuf_->subMatrix(g * K, K); + MatrixPtr out = outMatSub->subMatrix(g * M, M); + out->mul(*wMatSub, *in, 1.0, 1.0); } + } } if (nullptr != this->biasParameter_) { - REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); - this->addBias(); + REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); + this->addBias(); } forwardActivation(); } @@ -128,20 +131,20 @@ void Conv3DLayer::backward(const UpdateCallback &callback) { backwardActivation(); if (biases_ && biases_->getWGrad()) { - bpropBiases(); - biases_->getParameterPtr()->incUpdate(callback); + bpropBiases(); + biases_->getParameterPtr()->incUpdate(callback); } for (size_t i = 0; i != inputLayers_.size(); ++i) { - REGISTER_TIMER_INFO("BwdConv3D", getName().c_str()); - if (weights_[i]->getWGrad()) { - bpropWeights(i); - } - if (this->needGradient_) { - bpropData(i); - } - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - weights_[i]->getParameterPtr()->incUpdate(callback); + REGISTER_TIMER_INFO("BwdConv3D", getName().c_str()); + if (weights_[i]->getWGrad()) { + bpropWeights(i); + } + if (getInputGrad(i)) { + bpropData(i); + } + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weights_[i]->getParameterPtr()->incUpdate(callback); } } @@ -149,28 +152,36 @@ void Conv3DLayer::bpropWeights(int i) { int M = M_[i]; int N = N_[i]; int K = K_[i]; - const MatrixPtr& inMat = getInputValue(i); - int width = inMat->getWidth(); + const MatrixPtr &inMat = getInputValue(i); Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); MatrixPtr wGradMat = weights_[i]->getWGrad(); - real* outGradData = getOutputGrad()->getData(); int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - for (int n = 0; n < batchSize; ++n) { - colBuf_->vol2Col(inMat->getData() + n * width, channels_[i], - imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], - filterSizeZ_[i], filterSizeY_[i], filterSize_[i], - strideZ_[i], strideY_[i], stride_[i], - paddingZ_[i], paddingY_[i], padding_[i]); - outGradData += n * getOutputGrad()->getWidth(); - MatrixPtr outGradSub = - Matrix::create(outGradData, groups_[i] * M, N, false, useGpu_); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr inMatSub = colBuf_->subMatrix(g * K, K); - MatrixPtr outG = outGradSub->subMatrix(g * M, M); - MatrixPtr wGradSub = wGradMat->subMatrix(g * M, M); - wGradSub->mul(*outG, *(inMatSub->getTranspose()), 1.0, 1.0); - } + colBuf_->vol2Col(inMat->getData() + n * inMat->getStride(), + channels_[i], + imgSizeD_[i], + imgSizeH_[i], + imgSizeW_[i], + filterSizeZ_[i], + filterSizeY_[i], + filterSize_[i], + strideZ_[i], + strideY_[i], + stride_[i], + paddingZ_[i], + paddingY_[i], + padding_[i]); + + real *outGradData = + getOutputGrad()->getData() + n * getOutputGrad()->getStride(); + MatrixPtr outGradSub = + Matrix::create(outGradData, groups_[i] * M, N, false, useGpu_); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr inMatSub = colBuf_->subMatrix(g * K, K); + MatrixPtr outG = outGradSub->subMatrix(g * M, M); + MatrixPtr wGradSub = wGradMat->subMatrix(g * M, M); + wGradSub->mul(*outG, *(inMatSub->getTranspose()), 1.0, 1.0); + } } } @@ -180,45 +191,54 @@ void Conv3DLayer::bpropData(int i) { int K = K_[i]; Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); MatrixPtr wMat = weights_[i]->getW(); - real* outGradData = getOutputGrad()->getData(); - real* preGradData = getInputGrad(i)->getData(); int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); for (int n = 0; n < batchSize; ++n) { - outGradData += n * getOutputGrad()->getWidth(); - preGradData += n * getInputGrad(i)->getWidth(); - MatrixPtr outGradSub = - Matrix::create(outGradData, M * groups_[i], N, false, useGpu_); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr wMatSub = wMat->subMatrix(g * M, M); - MatrixPtr outG = outGradSub->subMatrix(g * M, M); - MatrixPtr inGradMatSub = colBuf_->subMatrix(g * K, K); - inGradMatSub->mul(*(wMatSub->getTranspose()), *outG, 1.0, 0.0); - } - colBuf_->col2Vol(preGradData, channels_[i], - imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], - filterSizeZ_[i], filterSizeY_[i], filterSize_[i], - strideZ_[i], strideY_[i], stride_[i], - paddingZ_[i], paddingY_[i], padding_[i], - 1.0, 1.0); + real *outGradData = + getOutputGrad()->getData() + n * getOutputGrad()->getStride(); + real *preGradData = + getInputGrad(i)->getData() + n * getInputGrad(i)->getStride(); + MatrixPtr outGradSub = + Matrix::create(outGradData, M * groups_[i], N, false, useGpu_); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr wMatSub = wMat->subMatrix(g * M, M); + MatrixPtr outG = outGradSub->subMatrix(g * M, M); + MatrixPtr inGradMatSub = colBuf_->subMatrix(g * K, K); + inGradMatSub->mul(*(wMatSub->getTranspose()), *outG, 1.0, 0.0); + } + colBuf_->col2Vol(preGradData, + channels_[i], + imgSizeD_[i], + imgSizeH_[i], + imgSizeW_[i], + filterSizeZ_[i], + filterSizeY_[i], + filterSize_[i], + strideZ_[i], + strideY_[i], + stride_[i], + paddingZ_[i], + paddingY_[i], + padding_[i], + 1.0, + 1.0); } } void Conv3DLayer::bpropBiases() { MatrixPtr outGradMat = getOutputGrad(); if (this->sharedBiases_) { - biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f); + biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f); } else { - biases_->getWGrad()->collectBias(*outGradMat, 1.0f); + biases_->getWGrad()->collectBias(*outGradMat, 1.0f); } } void Conv3DLayer::addBias() { MatrixPtr outMat = getOutputValue(); - if (this->sharedBiases_) { - outMat->addSharedBias(*(biases_->getW()), 1.0f); + outMat->addSharedBias(*(biases_->getW()), 1.0f); } else { - outMat->addBias(*(biases_->getW()), 1.0f); + outMat->addBias(*(biases_->getW()), 1.0f); } } diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp index 8de40b681..286f5b985 100644 --- a/paddle/gserver/layers/DeConv3DLayer.cpp +++ b/paddle/gserver/layers/DeConv3DLayer.cpp @@ -12,43 +12,42 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "DeConv3DLayer.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" -#include "DeConv3DLayer.h" namespace paddle { REGISTER_LAYER(deconv3d, DeConv3DLayer); #define DECONV_OUTPUT_SIZE(IN_SIZE, STRID, PAD, KSIZE) \ - (((IN_SIZE) - 1) * (STRID) - 2 * (PAD) + (KSIZE)) + (((IN_SIZE)-1) * (STRID)-2 * (PAD) + (KSIZE)) bool DeConv3DLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { + const ParameterMap ¶meterMap) { if (!ConvBaseLayer::init(layerMap, parameterMap)) return false; // for Deconv, the dimension of Kernel is // channel * output * depth * height * weigth // Matrix storage format: (output * depth * height * weigth) x channel for (int index = 0; index < config_.inputs().size(); ++index) { M_.push_back(filterChannels_[index]); - K_.push_back( - filterPixels_[index] * (numFilters_/groups_[index])); - weights_[index]->getW()->reshape( - filterPixels_[index] * numFilters_, - filterChannels_[index]); - weights_[index]->getWGrad()->reshape( - filterPixels_[index] * numFilters_, - filterChannels_[index]); + K_.push_back(filterPixels_[index] * (numFilters_ / groups_[index])); + if (weights_[index]->getW()) + weights_[index]->getW()->reshape(filterPixels_[index] * numFilters_, + filterChannels_[index]); + if (weights_[index]->getWGrad()) + weights_[index]->getWGrad()->reshape(filterPixels_[index] * numFilters_, + filterChannels_[index]); } - biases_->getWGrad()->reshape( - biases_->getWGrad()->width_, biases_->getWGrad()->height_); - biases_->getW()->reshape( - biases_->getW()->width_, biases_->getW()->height_); + if (biases_->getWGrad()) + biases_->getWGrad()->reshape(biases_->getWGrad()->width_, + biases_->getWGrad()->height_); + if (biases_->getW()) + biases_->getW()->reshape(biases_->getW()->width_, biases_->getW()->height_); CHECK(inputLayers_.size() == parameters_.size()); return true; } - size_t DeConv3DLayer::getSize() { CHECK_NE(inputLayers_.size(), 0UL); // imgSizeH_.clear(); @@ -64,18 +63,12 @@ size_t DeConv3DLayer::getSize() { // imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); // imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); // imgSizeD_.push_back(inputLayers_[i]->getOutput().getFrameDepth()); - outputW_.push_back( - DECONV_OUTPUT_SIZE( - imgSizeW_[i], stride_[i], - padding_[i], filterSize_[i])); - outputH_.push_back( - DECONV_OUTPUT_SIZE( - imgSizeH_[i], strideY_[i], - paddingY_[i], filterSizeY_[i])); - outputD_.push_back( - DECONV_OUTPUT_SIZE( - imgSizeD_[i], strideZ_[i], - paddingZ_[i], filterSizeZ_[i])); + outputW_.push_back(DECONV_OUTPUT_SIZE( + imgSizeW_[i], stride_[i], padding_[i], filterSize_[i])); + outputH_.push_back(DECONV_OUTPUT_SIZE( + imgSizeH_[i], strideY_[i], paddingY_[i], filterSizeY_[i])); + outputD_.push_back(DECONV_OUTPUT_SIZE( + imgSizeD_[i], strideZ_[i], paddingZ_[i], filterSizeZ_[i])); No_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); N_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]); CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); @@ -96,32 +89,37 @@ void DeConv3DLayer::forward(PassType passType) { for (size_t i = 0; i != inputLayers_.size(); ++i) { REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str()); - const MatrixPtr& inMat = getInputValue(i); - int width = inMat->getWidth(); + const MatrixPtr &inMat = getInputValue(i); int M = M_[i]; int N = N_[i]; int K = K_[i]; MatrixPtr wMat = weights_[i]->getW(); - Matrix::resizeOrCreate(colBuf_, K * groups_[i] , N, false, useGpu_); - + Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); for (int n = 0; n < batchSize; ++n) { - real *inData = inMat->getData() + n * width; - real *colBufData = colBuf_->getData(); - for (int g = 0; g < groups_[i]; g++) { - MatrixPtr wMatSub = wMat->subMatrix(g * K, K); - MatrixPtr inMatSub = - Matrix::create(inData, M, N, false, useGpu_); - MatrixPtr colBufDataSub = - Matrix::create(colBufData, K, N, false, useGpu_); - colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0); - colBufData += K * N; - inData += M * N; + real *inData = inMat->getData() + n * inMat->getStride(); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_); + MatrixPtr wMatSub = wMat->subMatrix(g * K, K); + MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); + colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0); + inData += M * N; } - colBuf_->col2Vol(outMat->getData()+ n * outMat->getWidth(), - numFilters_, outputD_[i], outputH_[i], outputW_[i], - filterSizeZ_[i], filterSizeY_[i], filterSize_[i], - strideZ_[i], strideY_[i], stride_[i], - paddingZ_[i], paddingY_[i], padding_[i], 1.0, 1.0); + colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(), + numFilters_, + outputD_[i], + outputH_[i], + outputW_[i], + filterSizeZ_[i], + filterSizeY_[i], + filterSize_[i], + strideZ_[i], + strideY_[i], + stride_[i], + paddingZ_[i], + paddingY_[i], + padding_[i], + 1.0, + 1.0); } } if (nullptr != this->biasParameter_) { @@ -134,63 +132,69 @@ void DeConv3DLayer::forward(PassType passType) { void DeConv3DLayer::backward(const UpdateCallback &callback) { backwardActivation(); int batchSize = getOutputGrad()->getHeight(); - int outputWidth = getOutputGrad()->getWidth(); if (biases_ && biases_->getWGrad()) { bpropBiases(); biases_->getParameterPtr()->incUpdate(callback); } - for (size_t i =0; i < inputLayers_.size(); ++i) { - int M = M_[i]; - int N = N_[i]; - int K = K_[i]; - Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); - const MatrixPtr& inMat = getInputValue(i); - for (int n = 0; n < batchSize; ++n) { + for (size_t i = 0; i < inputLayers_.size(); ++i) { + if (weights_[i]->getWGrad() || this->needGradient_) { + int M = M_[i]; + int N = N_[i]; + int K = K_[i]; REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str()); - if (weights_[i]->getWGrad() || this->needGradient_) { - colBuf_->vol2Col(getOutputGrad()->getData() + n * outputWidth, - numFilters_, outputD_[i], outputH_[i], outputW_[i], - filterSizeZ_[i], filterSizeY_[i], filterSize_[i], - strideZ_[i], strideY_[i], stride_[i], - paddingZ_[i], paddingY_[i], padding_[i]); - } - if (weights_[i]->getWGrad()) { - real *inData = inMat->getData() + n * inMat->getWidth();; - real *wGradData = weights_[i]->getWGrad()->getData(); - for (int g = 0; g < groups_[i]; g++) { - MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); - MatrixPtr inMatSub = Matrix::create( - inData, M, N, false, useGpu_); - MatrixPtr wGradMatSub = Matrix::create( - wGradData, K, M, false, useGpu_); - wGradMatSub->mul(*colBufDataSub, - *(inMatSub->getTranspose()), 1.0, 1.0); - wGradData += K * M; - inData += M * N; + Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); + const MatrixPtr &inMat = getInputValue(i); + for (int n = 0; n < batchSize; ++n) { + colBuf_->vol2Col( + getOutputGrad()->getData() + n * getOutputGrad()->getStride(), + numFilters_, + outputD_[i], + outputH_[i], + outputW_[i], + filterSizeZ_[i], + filterSizeY_[i], + filterSize_[i], + strideZ_[i], + strideY_[i], + stride_[i], + paddingZ_[i], + paddingY_[i], + padding_[i]); + if (weights_[i]->getWGrad()) { + real *inData = inMat->getData() + n * inMat->getStride(); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); + MatrixPtr wGradMatSub = + weights_[i]->getWGrad()->subMatrix(g * K, K); + MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_); + wGradMatSub->mul( + *colBufDataSub, *(inMatSub->getTranspose()), 1.0, 1.0); + inData += M * N; + } } - weights_[i]->getParameterPtr()->incUpdate(callback); - } - if (this->needGradient_) { - real* preGrad = getInputGrad(i)->getData(); - for (int g = 0; g < groups_[i]; ++g) { - MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K); - MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K); - MatrixPtr inGradMatSub = Matrix::create( - preGrad, M, N, false, useGpu_); - inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 0.0); - preGrad += M * N; + if (getInputGrad(i)) { + real *preGrad = + getInputGrad(i)->getData() + n * getInputGrad(i)->getStride(); + for (int g = 0; g < groups_[i]; ++g) { + MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K); + MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K); + MatrixPtr inGradMatSub = + Matrix::create(preGrad, M, N, false, useGpu_); + inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 1.0); + preGrad += M * N; + } } } REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weights_[i]->getParameterPtr()->incUpdate(callback); } } } - -void DeConv3DLayer::bpropWeights(int i) { } -void DeConv3DLayer::bpropData(int i) { } +void DeConv3DLayer::bpropWeights(int i) {} +void DeConv3DLayer::bpropData(int i) {} void DeConv3DLayer::bpropBiases() { - MatrixPtr outGradMat = getOutputGrad(); + const MatrixPtr &outGradMat = getOutputGrad(); if (this->sharedBiases_) { biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f); -- GitLab