diff --git a/doc/source/gserver/layers/layer.rst b/doc/source/gserver/layers/layer.rst index 807b22ca140ee71208a96e2877b9c5636620b165..4b8e149505f0695ad2fa4be967a50d1a0ac48b43 100644 --- a/doc/source/gserver/layers/layer.rst +++ b/doc/source/gserver/layers/layer.rst @@ -465,6 +465,11 @@ SumOfSquaresCostLayer .. doxygenclass:: paddle::SumOfSquaresCostLayer :members: +SumCostLayer +````````````````````` +.. doxygenclass:: paddle::SumCostLayer + :members: + CosSimLayer ----------- .. doxygenclass:: paddle::CosSimLayer diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst index c78682423e448a472ca46f2cb100a40efface6eb..56b23640205ec8c7575541bd270720fb861457a1 100644 --- a/doc/ui/api/trainer_config_helpers/layers.rst +++ b/doc/ui/api/trainer_config_helpers/layers.rst @@ -46,6 +46,12 @@ conv_operator :members: conv_operator :noindex: +conv_projection +------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: conv_projection + :noindex: + conv_shift_layer ------------------ .. automodule:: paddle.trainer_config_helpers.layers @@ -71,6 +77,12 @@ img_pool_layer -------------- .. automodule:: paddle.trainer_config_helpers.layers :members: img_pool_layer + :noindex: + +spp_layer +-------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: spp_layer :noindex: maxout_layer @@ -254,6 +266,12 @@ expand_layer :members: expand_layer :noindex: +repeat_layer +------------ +.. automodule:: paddle.trainer_config_helpers.layers + :members: repeat_layer + :noindex: + Math Layers =========== @@ -401,6 +419,12 @@ hsigmoid :members: hsigmoid :noindex: +sum_cost +--------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: sum_cost + :noindex: + Check Layer ============ diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index ac35727ac28c7ee7a41d7d3d93d7c18288950a41..70b5be6fda2509853029a68d31129df28d580942 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -91,6 +91,7 @@ extern void hl_expand_feature2col( * @param[in] paddingH padding height. * @param[in] paddingW padding width. * @param[out] tgtData output data. + * @param[in] tgtStride stride between output data samples. * */ extern void hl_maxpool_forward( @@ -100,7 +101,8 @@ extern void hl_maxpool_forward( const int pooledH, const int pooledW, const int sizeX, const int sizeY, const int strideH, const int strideW, - const int paddingH, const int paddingW, real* tgtData); + const int paddingH, const int paddingW, + real* tgtData, const int tgtStride); /** * @brief Maximum pool backward. @@ -123,6 +125,7 @@ extern void hl_maxpool_forward( * @param[in] paddingH padding height. * @param[in] paddingW padding width. * @param[out] targetGrad output grad. + * @param[in] outStride stride between output data samples. * */ extern void hl_maxpool_backward( @@ -135,7 +138,7 @@ extern void hl_maxpool_backward( const int strideH, const int strideW, const int paddingH, const int paddingW, real scaleA, real scaleB, - real* targetGrad); + real* targetGrad, const int outStride); /** * @brief Averge pool forward. @@ -154,6 +157,7 @@ extern void hl_maxpool_backward( * @param[in] paddingH padding height. * @param[in] paddingW padding width. * @param[out] tgtData output data. + * @param[in] tgtStride stride between output data samples. * */ extern void hl_avgpool_forward( @@ -163,7 +167,8 @@ extern void hl_avgpool_forward( const int pooledH, const int pooledW, const int sizeX, const int sizeY, const int strideH, const int strideW, - const int paddingH, const int paddingW, real* tgtData); + const int paddingH, const int paddingW, + real* tgtData, const int tgtStride); /** * @brief Maximum pool backward. @@ -184,6 +189,7 @@ extern void hl_avgpool_forward( * @param[in] scaleA scale. * @param[in] scaleB scale. * @param[out] backGrad output grad. + * @param[in] outStride stride between output data samples. * */ extern void hl_avgpool_backward( @@ -195,7 +201,7 @@ extern void hl_avgpool_backward( const int strideH, const int strideW, int paddingH, int paddingW, real scaleA, real scaleB, - real* backGrad); + real* backGrad, const int outStride); /** * @brief Cross-map-respose normalize forward. diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index 50fddce584252b459b1146b8528e2918416aff95..c6f32ad337705ff938b7b370a4785dc7f4393041 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -44,7 +44,8 @@ inline void hl_maxpool_forward( const int pooledH, const int pooledW, const int sizeX, const int sizeY, const int strideH, const int strideW, - const int paddingH, const int paddingW, real* tgtData) {} + const int paddingH, const int paddingW, + real* tgtData, const int tgtStride) {} inline void hl_maxpool_backward( const int frameCnt, const real* inputData, @@ -56,7 +57,7 @@ inline void hl_maxpool_backward( const int strideH, const int strideW, const int paddingH, const int paddingW, real scaleA, real scaleB, - real* targetGrad) {} + real* targetGrad, const int outStride) {} inline void hl_avgpool_forward( const int frameCnt, const real* inputData, @@ -65,7 +66,8 @@ inline void hl_avgpool_forward( const int pooledH, const int pooledW, const int sizeX, const int sizeY, const int strideH, const int strideW, - const int paddingH, const int paddingW, real* tgtData) {} + const int paddingH, const int paddingW, + real* tgtData, const int tgtStride) {} inline void hl_avgpool_backward( const int frameCnt, const real* outGrad, @@ -76,7 +78,7 @@ inline void hl_avgpool_backward( const int strideH, const int strideW, int paddingH, int paddingW, real scaleA, real scaleB, - real* backGrad) {} + real* backGrad, const int outStride) {} inline void hl_CMRNorm_forward( size_t frameCnt, const real* in, real* scale, real* out, diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index 9eec44f77f27a0bf29d3b68260d663a5687d1b0c..ae387a8bc0e0791995810df9e5f2556264d869b1 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -152,7 +152,7 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData, const int ksizeW, const int ksizeH, const int strideH, const int strideW, const int offsetH, const int offsetW, - real* tgtData) { + real* tgtData, const int tgtStride) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { int pw = index % pooledW; @@ -173,7 +173,9 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData, maxval = inputData[h * width + w]; } } - tgtData[index] = maxval; + int tgtIndex = index % (pooledW * pooledH * channels) + + frameNum * tgtStride; + tgtData[tgtIndex] = maxval; } } @@ -184,7 +186,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData, const int sizeX, const int sizeY, const int strideH, const int strideW, const int paddingH, const int paddingW, - real* tgtData) { + real* tgtData, const int tgtStride) { int num_kernels = pooledH * pooledW * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; @@ -194,7 +196,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData, KeMaxPoolForward<<< grid, threads, 0, STREAM_DEFAULT >>> (num_kernels, inputData, channels, height, width, pooledH, pooledW, sizeX, sizeY, strideH, strideW, - paddingH, paddingW, tgtData); + paddingH, paddingW, tgtData, tgtStride); CHECK_SYNC("hl_maxpool_forward failed"); } @@ -207,7 +209,7 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData, const int strideH, const int strideW, const int padH, const int padW, real scaleA, real scaleB, - real* targetGrad) { + real* targetGrad, const int outStride) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { // find out the local index @@ -223,8 +225,8 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData, int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0; real gradient = 0; real input = inputData[index]; - outData += (frameNum * channels + offsetC) * pooledH * pooledW; - outGrad += (frameNum * channels + offsetC) * pooledH * pooledW; + outData += (frameNum * outStride + offsetC * pooledH * pooledW); + outGrad += (frameNum * outStride + offsetC * pooledH * pooledW); for (int ph = phstart; ph < phend; ++ph) { for (int pw = pwstart; pw < pwend; ++pw) { if (input == outData[ph * pooledW + pw]) { @@ -246,7 +248,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData, const int strideH, const int strideW, const int paddingH, const int paddingW, real scaleA, real scaleB, - real* targetGrad) { + real* targetGrad, const int outStride) { int num_kernels = height * width * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; @@ -257,7 +259,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData, strideH, strideW, paddingH, paddingW, scaleA, scaleB, - targetGrad); + targetGrad, outStride); CHECK_SYNC("hl_maxpool_backward"); } @@ -268,7 +270,7 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData, const int sizeX, const int sizeY, const int strideH, const int strideW, const int padH, const int padW, - real* tgtData) { + real* tgtData, const int tgtStride) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { int pw = index % pooledW; @@ -293,7 +295,9 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData, aveval += inputData[h * width + w]; } } - tgtData[index] = aveval / pool_size; + int tgtIndex = index % (pooledW * pooledH * channels) + + frameNum * tgtStride; + tgtData[tgtIndex] = aveval / pool_size; } } @@ -303,14 +307,15 @@ void hl_avgpool_forward(const int frameCnt, const real* inputData, const int pooledH, const int pooledW, const int sizeX, const int sizeY, const int strideH, const int strideW, - const int paddingH, const int paddingW, real* tgtData) { + const int paddingH, const int paddingW, + real* tgtData, const int tgtStride) { int num_kernels = pooledH * pooledW * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; KeAvgPoolForward<<< blocks, 1024, 0, STREAM_DEFAULT >>> (num_kernels, inputData, channels, height, width, pooledH, pooledW, sizeX, sizeY, strideH, strideW, - paddingH, paddingW, tgtData); + paddingH, paddingW, tgtData, tgtStride); CHECK_SYNC("hl_avgpool_forward failed"); } @@ -322,7 +327,7 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad, const int strideH, const int strideW, const int padH, const int padW, real scaleA, real scaleB, - real* tgtGrad) { + real* tgtGrad, const int outStride) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { int offsetW = index % width + padW; @@ -335,7 +340,8 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad, int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0; int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0; real gradient = 0; - outGrad += (frameNum * channels + offsetC) * pooledH * pooledW; + outGrad += (frameNum * outStride + offsetC * pooledH * pooledW); + for (int ph = phstart; ph < phend; ++ph) { for (int pw = pwstart; pw < pwend; ++pw) { @@ -360,7 +366,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad, const int strideH, const int strideW, const int paddingH, const int paddingW, real scaleA, real scaleB, - real* backGrad) { + real* backGrad, const int outStride) { int num_kernels = height * width * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; @@ -370,7 +376,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad, strideH, strideW, paddingH, paddingW, scaleA, scaleB, - backGrad); + backGrad, outStride); CHECK_SYNC("hl_avgpool_backward failed"); } diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 14ff8510f7b19dc24b7b1ba603485488ddd4979d..949788be497874a5bb34e49e11bdc8ba3205ba61 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -562,4 +562,39 @@ void HuberTwoClass::backwardImpIn( } } +/** + * This cost layer compute the sum of its input as loss. + * \f[ + * o(i) = \sum_{j=1}^D y_{ij} + * \f] + */ +class SumCostLayer : public Layer { +public: + explicit SumCostLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool ret = Layer::init(layerMap, parameterMap); + if (!ret) return ret; + CHECK_EQ(inputLayers_.size(), 1UL); + return true; + } + + virtual void forward(PassType passType) { + Layer::forward(passType); + const MatrixPtr& input = getInputValue(0); + + /* malloc memory for the output_ if necessary */ + int batchSize = input->getHeight(); + int size = 1; + resizeOutput(batchSize, size); + output_.value->sumRows(*input); + } + + virtual void backward(const UpdateCallback& callback = nullptr) { + getInputGrad(0)->add((real)1); + } +}; + +REGISTER_LAYER(sum_cost, SumCostLayer); + } // namespace paddle diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index b464e16737ae561dce6e7d4f16a4dd61f73204e0..f263c688213ae6a83d5db4a1025aa252344dfab8 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -129,7 +129,7 @@ protected: * This cost layer compute Euclidean (L2) loss for real-valued regression * tasks. * \f[ - * L = \frac{1}{2N} \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2} + * L = \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2} * \f] */ class SumOfSquaresCostLayer : public CostLayer { diff --git a/paddle/gserver/layers/PoolLayer.cpp b/paddle/gserver/layers/PoolLayer.cpp index 7fc27ac0bd8e05246d87bac0e9692d8496f6601f..2fbc9001f11613cd987e3815f6f31caa8f9979cf 100644 --- a/paddle/gserver/layers/PoolLayer.cpp +++ b/paddle/gserver/layers/PoolLayer.cpp @@ -52,10 +52,8 @@ bool PoolLayer::init(const LayerMap& layerMap, Layer* PoolLayer::create(const LayerConfig& config) { CHECK_EQ(config.inputs_size(), 1); const std::string& pool = config.inputs(0).pool_conf().pool_type(); - if (pool == "max-projection") { - return new MaxPoolProjectionLayer(config); - } else if (pool == "avg-projection") { - return new AvgPoolProjectionLayer(config); + if (pool == "max-projection" || pool == "avg-projection") { + return new PoolProjectionLayer(config); #ifndef PADDLE_ONLY_CPU } else if (CudnnPoolLayer::typeCheck(pool)) { return new CudnnPoolLayer(config); diff --git a/paddle/gserver/layers/PoolProjection.cpp b/paddle/gserver/layers/PoolProjection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9be5aba3d57d23e462c9ea3608491606f988c35f --- /dev/null +++ b/paddle/gserver/layers/PoolProjection.cpp @@ -0,0 +1,123 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "PoolProjection.h" + +namespace paddle { + +REGISTER_PROJECTION_CREATE_FUNC(pool, &PoolProjection::create); + +PoolProjection::PoolProjection(const ProjectionConfig& config, + ParameterPtr parameter, bool useGpu) + : Projection(config, parameter, useGpu) { + const PoolConfig& conf = config_.pool_conf(); + poolType_ = conf.pool_type(); + channels_ = conf.channels(); + sizeX_ = conf.size_x(); + stride_ = conf.stride(); + outputX_ = conf.output_x(); + imgSize_ = conf.img_size(); + confPadding_ = conf.padding(); + + sizeY_ = conf.has_size_y() ? conf.size_y() : conf.size_x(); + imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); + strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride(); + confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding(); + outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); +} + +size_t PoolProjection::getSize() { + imgSizeY_ = in_->getFrameHeight(); + imgSize_ = in_->getFrameWidth(); + const PoolConfig& conf = config_.pool_conf(); + if (imgSizeY_ == 0) { + imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); + } + if (imgSize_ == 0) { + imgSize_ = conf.img_size(); + } + outputY_ = outputSize(imgSizeY_, sizeY_, confPaddingY_, strideY_, + /* caffeMode */ false); + outputX_ = outputSize(imgSize_, sizeX_, confPadding_, stride_, + /* caffeMode */ false); + + const_cast(out_)->setFrameHeight(outputY_); + const_cast(out_)->setFrameWidth(outputX_); + + return outputY_ * outputX_ * channels_; +} + +PoolProjection* PoolProjection::create(const ProjectionConfig& config, + ParameterPtr parameter, bool useGpu) { + const std::string& pool = config.pool_conf().pool_type(); + if (pool == "max-projection") { + return new MaxPoolProjection(config, parameter, useGpu); + } else if (pool == "avg-projection") { + return new AvgPoolProjection(config, parameter, useGpu); + } else { + LOG(FATAL) << "Unknown pool type: " << pool; + return nullptr; + } +} + +void MaxPoolProjection::forward() { + size_t width = getSize(); + CHECK_EQ(width, out_->value->getWidth()); + MatrixPtr inputV = in_->value; + MatrixPtr outV = out_->value; + outV->maxPoolForward(*inputV, imgSizeY_, imgSize_, channels_, sizeX_, sizeY_, + strideY_, stride_, outputY_, outputX_, confPaddingY_, + confPadding_); +} + +void MaxPoolProjection::backward(const UpdateCallback& callback) { + (void)callback; + MatrixPtr outGrad = out_->grad; + MatrixPtr inputV = in_->value; + MatrixPtr outV = out_->value; + MatrixPtr inputGrad = in_->grad; + + if (NULL == inputGrad) { + return; + } + inputGrad->maxPoolBackward(*inputV, imgSizeY_, imgSize_, *outGrad, *outV, + sizeX_, sizeY_, strideY_, stride_, outputY_, + outputX_, 1, 1, confPaddingY_, confPadding_); +} + +void AvgPoolProjection::forward() { + size_t width = getSize(); + CHECK_EQ(width, out_->value->getWidth()); + MatrixPtr inputV = in_->value; + MatrixPtr outV = out_->value; + outV->avgPoolForward(*inputV, imgSizeY_, imgSize_, channels_, sizeX_, sizeY_, + strideY_, stride_, outputY_, outputX_, confPaddingY_, + confPadding_); +} + +void AvgPoolProjection::backward(const UpdateCallback& callback) { + (void)callback; + + MatrixPtr outputGrad = out_->grad; + MatrixPtr inputGrad = in_->grad; + + if (NULL == inputGrad) { + return; + } + + inputGrad->avgPoolBackward(*outputGrad, imgSizeY_, imgSize_, sizeX_, sizeY_, + strideY_, stride_, outputY_, outputX_, 1, 1, + confPaddingY_, confPadding_); +} +} // namespace paddle diff --git a/paddle/gserver/layers/PoolProjection.h b/paddle/gserver/layers/PoolProjection.h new file mode 100644 index 0000000000000000000000000000000000000000..a11e25b729cb7afabdb3547326f269e54ddf42da --- /dev/null +++ b/paddle/gserver/layers/PoolProjection.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Projection.h" +#include "paddle/math/MathUtils.h" + +namespace paddle { + +class PoolProjection : public Projection { +protected: + size_t imgSizeY_, imgSize_; + size_t outputY_, outputX_; + size_t strideY_, stride_; + size_t sizeY_, sizeX_; + int confPaddingY_, confPadding_; + size_t channels_; + std::string poolType_; + +public: + PoolProjection(const ProjectionConfig& config, ParameterPtr parameter, + bool useGpu); + + static PoolProjection* create(const ProjectionConfig& config, + ParameterPtr parameter, bool useGpu); + + const std::string& getPoolType() const { return poolType_; } + + size_t getSize(); +}; + +class MaxPoolProjection : public PoolProjection { +public: + MaxPoolProjection(const ProjectionConfig& config, ParameterPtr parameter, + bool useGpu) + : PoolProjection(config, parameter, useGpu) {} + + virtual void forward(); + virtual void backward(const UpdateCallback& callback = nullptr); +}; + +class AvgPoolProjection : public PoolProjection { +public: + AvgPoolProjection(const ProjectionConfig& config, ParameterPtr parameter, + bool useGpu) + : PoolProjection(config, parameter, useGpu) {} + + virtual void forward(); + virtual void backward(const UpdateCallback& callback = nullptr); +}; +} // namespace paddle diff --git a/paddle/gserver/layers/PoolProjectionLayer.cpp b/paddle/gserver/layers/PoolProjectionLayer.cpp index 9e8ce778501bbc1f91bfad6d3ab7eb5b1b6f4c80..cabb346d6c99178f7c8ce049d495785c0a488173 100644 --- a/paddle/gserver/layers/PoolProjectionLayer.cpp +++ b/paddle/gserver/layers/PoolProjectionLayer.cpp @@ -18,6 +18,7 @@ limitations under the License. */ namespace paddle { + size_t PoolProjectionLayer::getSize() { CHECK_EQ(inputLayers_.size(), 1UL); size_t layerSize = 0; @@ -37,74 +38,23 @@ size_t PoolProjectionLayer::getSize() { layerSize = outputH_ * outputW_ * channels_; - getOutput().setFrameHeight(outputH_); - getOutput().setFrameWidth(outputW_); return layerSize; } -void MaxPoolProjectionLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - /* note: one sample correspond to one ROW */ - MatrixPtr input = getInputValue(0); - int batchSize = input->getHeight(); - int size = getSize(); - resetOutput(batchSize, size); - - MatrixPtr outV = getOutputValue(); - - outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_, - strideY_, stride_, outputH_, outputW_, confPaddingY_, - confPadding_); -} - -void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) { - (void)callback; - - if (NULL == getInputGrad(0)) { - return; - } - - /* Do derivation */ - MatrixPtr outGrad = getOutputGrad(); - MatrixPtr inputV = getInputValue(0); - MatrixPtr outV = getOutputValue(); - MatrixPtr inputGrad = getInputGrad(0); - - inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV, - sizeX_, sizeY_, strideY_, stride_, outputH_, - outputW_, 1, 1, confPaddingY_, confPadding_); -} - -void AvgPoolProjectionLayer::forward(PassType passType) { +void PoolProjectionLayer::forward(PassType passType) { Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - /* note: one sample correspond to one ROW */ - MatrixPtr input = getInputValue(0); - int batchSize = input->getHeight(); + const Argument& in = getInput(0); + int batchSize = in.value->getHeight(); int size = getSize(); resetOutput(batchSize, size); - - MatrixPtr outV = getOutputValue(); - - outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_, - strideY_, stride_, outputH_, outputW_, confPaddingY_, - confPadding_); + poolProjection_->forward(&in, &output_, passType); } -void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) { +void PoolProjectionLayer::backward(const UpdateCallback& callback) { (void)callback; - if (NULL == getInputGrad(0)) { return; } - /* Do derivation */ - MatrixPtr outputGrad = getOutputGrad(); - MatrixPtr inputGrad = getInputGrad(0); - inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, sizeY_, - strideY_, stride_, outputH_, outputW_, 1, 1, - confPaddingY_, confPadding_); + poolProjection_->backward(callback); } } // namespace paddle diff --git a/paddle/gserver/layers/PoolProjectionLayer.h b/paddle/gserver/layers/PoolProjectionLayer.h index 42bbc83c62246dfc8e69aa0b427b27819a701eb6..777b6f39e7cc4ebaa7078ce3378b2688363245e8 100644 --- a/paddle/gserver/layers/PoolProjectionLayer.h +++ b/paddle/gserver/layers/PoolProjectionLayer.h @@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #pragma once +#include #include "PoolLayer.h" +#include "PoolProjection.h" #include "paddle/math/Matrix.h" -#include namespace paddle { /** @@ -27,33 +27,18 @@ class PoolProjectionLayer : public PoolLayer { protected: size_t imgSizeH_, imgSizeW_; size_t outputH_, outputW_; + std::unique_ptr poolProjection_; + ProjectionConfig projectionConfig_; public: - size_t getSize(); - explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {} -}; -/** - * @brief A layer for max pooling - */ -class MaxPoolProjectionLayer : public PoolProjectionLayer { -public: - explicit MaxPoolProjectionLayer(const LayerConfig& config) - : PoolProjectionLayer(config) {} - - ~MaxPoolProjectionLayer() {} + explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) { + PoolConfig* conf = projectionConfig_.mutable_pool_conf(); + *conf = config_.inputs(0).pool_conf(); + poolProjection_.reset( + PoolProjection::create(projectionConfig_, nullptr, useGpu_)); + } - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); -}; -/** - * @brief A layer for average pooling - */ -class AvgPoolProjectionLayer : public PoolProjectionLayer { -public: - explicit AvgPoolProjectionLayer(const LayerConfig& config) - : PoolProjectionLayer(config) {} - - ~AvgPoolProjectionLayer() {} + size_t getSize(); virtual void forward(PassType passType); virtual void backward(const UpdateCallback& callback = nullptr); diff --git a/paddle/gserver/layers/Projection.h b/paddle/gserver/layers/Projection.h index 3fa3a0cc230ac4c8616abe0eb2c8ac41bde52d53..203edc5396a53cf72dcad6308335ba4731ba49bc 100644 --- a/paddle/gserver/layers/Projection.h +++ b/paddle/gserver/layers/Projection.h @@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #pragma once -#include "paddle/parameter/Parameter.h" -#include "ModelConfig.pb.h" #include "Layer.h" +#include "ModelConfig.pb.h" +#include "paddle/parameter/Parameter.h" namespace paddle { @@ -28,6 +27,11 @@ namespace paddle { Projection::registrar_.registerClass<__class_name>(#__type_name); \ }) +#define REGISTER_PROJECTION_CREATE_FUNC(__type_name, createFunction) \ + static InitFunction __reg_type_##__type_name([]() { \ + Projection::registrar_.registerClass(#__type_name, createFunction); \ + }) + /** * A projection takes one Argument as input, calculate the result and add it * to output Argument. @@ -50,7 +54,8 @@ public: registrar_; /** - * Forward propagation. If backward() will be called, in and out must be kept valid until then. + * Forward propagation. If backward() will be called, in and out must be kept + * valid until then. * @param in input of projection * @param out output of projection * @param passType PASS_TRAIN of PASS_TEST diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..582abf78c84a4e1bce87f78f2abbd01620bd1d9c --- /dev/null +++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp @@ -0,0 +1,130 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "SpatialPyramidPoolLayer.h" + +namespace paddle { + +REGISTER_LAYER(spp, SpatialPyramidPoolLayer); + +ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW, + size_t imgSizeH, + size_t channels, + size_t pyramidLevel, + std::string& poolType) { + ProjectionConfig config; + config.set_type("pool"); + PoolConfig* conf = config.mutable_pool_conf(); + conf->set_channels(channels); + conf->set_img_size(imgSizeW); + conf->set_img_size_y(imgSizeH); + conf->set_pool_type(poolType); + + int numBins = std::pow(2, pyramidLevel); + + int sizeH = std::ceil(imgSizeH / static_cast(numBins)); + int paddingH = (sizeH * numBins - imgSizeH + 1) / 2; + int outSizeH = outputSize(imgSizeH, sizeH, paddingH, sizeH, true); + + int sizeW = std::ceil(imgSizeW / static_cast(numBins)); + int paddingW = (sizeW * numBins - imgSizeW + 1) / 2; + int outSizeW = outputSize(imgSizeW, sizeW, paddingW, sizeW, true); + + conf->set_stride(sizeW); + conf->set_stride_y(sizeH); + conf->set_size_x(sizeW); + conf->set_size_y(sizeH); + conf->set_padding(paddingW); + conf->set_padding_y(paddingH); + conf->set_output_x(outSizeW); + conf->set_output_y(outSizeH); + config.set_output_size(outSizeH * outSizeW * channels); + return config; +} + +size_t SpatialPyramidPoolLayer::getSize() { + CHECK_EQ(inputLayers_.size(), 1UL); + size_t layerSize = 0; + const SppConfig& sppConf = config_.inputs(0).spp_conf(); + imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); + imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); + if (imgSizeH_ == 0) { + imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_; + } + if (imgSizeW_ == 0) { + imgSizeW_ = sppConf.img_size(); + } + + size_t outputH = 1; + size_t outputW = (std::pow(4, pyramidHeight_) - 1) / (4 - 1); + + layerSize = outputH * outputW * channels_; + return layerSize; +} + +bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + CHECK_EQ(config_.inputs_size(), 1); + + const SppConfig& sppConf = config_.inputs(0).spp_conf(); + pyramidHeight_ = sppConf.pyramid_height(); + poolType_ = sppConf.pool_type(); + + channels_ = sppConf.channels(); + imgSizeW_ = sppConf.img_size(); + imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_; + poolProjections_.reserve(pyramidHeight_); + projCol_.reserve(pyramidHeight_); + projOutput_.resize(pyramidHeight_); + + size_t startCol = 0; + size_t endCol = 0; + for (size_t i = 0; i < pyramidHeight_; i++) { + poolProjections_.emplace_back(PoolProjection::create( + getConfig(imgSizeW_, imgSizeH_, channels_, i, poolType_), nullptr, + useGpu_)); + endCol += poolProjections_[i]->getOutputSize(); + projCol_.push_back(std::make_pair(startCol, endCol)); + startCol = endCol; + } + CHECK_EQ(endCol, getSize()); + return true; +} + +void SpatialPyramidPoolLayer::forward(PassType passType) { + Layer::forward(passType); + + int batchSize = getInput(0).getBatchSize(); + resetOutput(batchSize, getSize()); + for (size_t i = 0; i < pyramidHeight_; i++) { + size_t startCol = projCol_[i].first; + size_t endCol = projCol_[i].second; + projOutput_[i].value = output_.value->subColMatrix(startCol, endCol); + projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol); + } + for (size_t i = 0; i < pyramidHeight_; i++) { + poolProjections_[i]->forward(&getInput(0), &projOutput_[i], passType); + } +} + +void SpatialPyramidPoolLayer::backward(const UpdateCallback& callback) { + for (size_t i = 0; i < pyramidHeight_; i++) { + if (poolProjections_[i]) { + poolProjections_[i]->backward(callback); + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.h b/paddle/gserver/layers/SpatialPyramidPoolLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..e15b6d2f85c6f5b9620e28aaef9c6246341611f9 --- /dev/null +++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" +#include "PoolProjection.h" +#include "paddle/math/MathUtils.h" +#include "paddle/utils/Logging.h" + +namespace paddle { +/** + * @brief A layer for spatial pyramid pooling on the input image by taking + * the max, average, etc. within regions, so that the result vector of + * different sized images are of the same size. + * + * The config file api is spp_layer. + */ + +class SpatialPyramidPoolLayer : public Layer { +protected: + size_t channels_; + size_t imgSizeW_; + size_t imgSizeH_; + size_t pyramidHeight_; + std::string poolType_; + + std::vector> poolProjections_; + std::vector projOutput_; + std::vector> projCol_; + +public: + explicit SpatialPyramidPoolLayer(const LayerConfig& config) : Layer(config) {} + + ~SpatialPyramidPoolLayer() {} + + virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + ProjectionConfig getConfig(size_t sizeX_, size_t sizeY_, size_t channels, + size_t pyamidLevel_, std::string& poolType_); + size_t getSize(); + + virtual void forward(PassType passType); + virtual void backward(const UpdateCallback& callback = nullptr); +}; +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 9e9ee9eeceb928229f2e3be29c229f7a2ab14d8a..e7e07e9e69dc7a1b51211364dad7043bdcbaf4c3 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -13,15 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include #include -#include "paddle/gserver/layers/DataLayer.h" +#include #include "ModelConfig.pb.h" +#include "paddle/gserver/layers/DataLayer.h" #include "paddle/trainer/Trainer.h" #include "paddle/math/MathUtils.h" -#include "TestUtil.h" #include "LayerGradUtil.h" +#include "TestUtil.h" using namespace paddle; // NOLINT using namespace std; // NOLINT @@ -981,6 +981,32 @@ TEST(Layer, PoolLayer) { #endif } +void testSppLayer(const string& poolType, const int pyramidHeight, bool trans, + bool useGpu) { + TestConfig config; + config.layerConfig.set_type("spp"); + config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + SppConfig* sppConfig = input->mutable_spp_conf(); + sppConfig->set_pool_type(poolType); + sppConfig->set_pyramid_height(pyramidHeight); + sppConfig->set_channels(16); + sppConfig->set_img_size(10); + sppConfig->set_img_size_y(20); + int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1); + config.layerConfig.set_size(outputSize * sppConfig->channels()); + testLayerGrad(config, "spp", 100, trans, useGpu); +} + +TEST(Layer, SpatialPyramidPoolLayer) { + for (auto useGpu : {false, true}) { + for (auto pyramidHeight : {1, 2, 3}) { + testSppLayer("avg-projection", pyramidHeight, false, useGpu); + testSppLayer("max-projection", pyramidHeight, false, useGpu); + } + } +} + TEST(Layer, rankCostLayer) { TestConfig config; config.layerConfig.set_type("rank-cost"); @@ -998,6 +1024,19 @@ TEST(Layer, rankCostLayer) { } } +TEST(Layer, sumCostLayer) { + TestConfig config; + config.layerConfig.set_type("sum_cost"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "sum_cost", 100, false, useGpu); + } +} + TEST(Layer, weightedRankCostLayer) { TestConfig config; config.layerConfig.set_type("rank-cost"); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 8a2a3791d7feda9affdec2a8155230fdf4aef99b..950c3bb6cca28ad4e9c10bc984898c9d643478c4 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -13,20 +13,20 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Matrix.h" +#include "MathFunctions.h" #include "SparseMatrix.h" #include "SparseRowMatrix.h" -#include "MathFunctions.h" -#include #include #include +#include -#include "paddle/utils/Logging.h" #include #include "hl_cnn.h" #include "hl_gpu.h" #include "hl_table_apply.h" #include "hl_top_k.h" +#include "paddle/utils/Logging.h" #include "paddle/utils/ThreadLocal.h" @@ -43,9 +43,9 @@ inline real _safelog(real a) { return a > 0.0f ? std::log(a) : -40.0f; } Matrix::Matrix(MemoryHandlePtr memHandle, size_t height, size_t width, bool trans, bool use_gpu) : BaseMatrix( - height, width, - memHandle ? (reinterpret_cast(memHandle->getBuf())) : nullptr, - trans, use_gpu) { + height, width, + memHandle ? (reinterpret_cast(memHandle->getBuf())) : nullptr, + trans, use_gpu) { elementCnt_ = width * height; memoryHandle_ = memHandle; } @@ -96,7 +96,7 @@ MatrixPtr Matrix::create(MemoryHandlePtr memHandle, size_t height, size_t width, if (auto gpuHandle = std::dynamic_pointer_cast(memHandle)) { return std::make_shared(gpuHandle, height, width, trans); } else if (auto cpuHandle = - std::dynamic_pointer_cast(memHandle)) { + std::dynamic_pointer_cast(memHandle)) { return std::make_shared(cpuHandle, height, width, trans); } else { LOG(FATAL) << "Wrong"; @@ -387,17 +387,17 @@ void GpuMatrix::addSharedBias(Matrix& b, real scale) { void GpuMatrix::collectBias(Matrix& a, real scale) { CHECK_EQ(getHeight(), (size_t)1); CHECK_EQ(width_, a.getWidth()); - GpuSparseMatrix* sMatPtr = dynamic_cast(&a); + GpuSparseMatrix* sMatPtr = dynamic_cast(&a); if (!sMatPtr) { sumCols(a, scale); } else { real* data = getData(); hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get(); - hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), - width_, scale); + hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), width_, scale); } } + void GpuMatrix::collectSharedBias(Matrix& a, real scale) { CHECK_EQ(getHeight(), (size_t)1); CHECK_EQ(a.getWidth() % getWidth(), 0UL); @@ -453,8 +453,8 @@ void GpuMatrix::mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, hl_trans_op_t transa = !a.isTransposed() ? HPPL_OP_N : HPPL_OP_T; hl_trans_op_t transb = !b.isTransposed() ? HPPL_OP_N : HPPL_OP_T; - hl_matrix_mul(A_d, transa, B_d, transb, C_d, dimM, dimN, dimK, - scaleAB, scaleT, lda, ldb, ldc); + hl_matrix_mul(A_d, transa, B_d, transb, C_d, dimM, dimN, dimK, scaleAB, + scaleT, lda, ldb, ldc); } void GpuMatrix::mul(const GpuSparseMatrix& a, const GpuMatrix& b, real scaleAB, @@ -475,8 +475,8 @@ void GpuMatrix::mul(const GpuSparseMatrix& a, const GpuMatrix& b, real scaleAB, hl_sparse_matrix_s A_d = a.sMatrix_.get(); real* B_d = b.data_; real* C_d = data_; - hl_matrix_csr_mul_dense(A_d, transA, B_d, HPPL_OP_N, C_d, height_, - width_, b.height_, scaleAB, scaleT); + hl_matrix_csr_mul_dense(A_d, transA, B_d, HPPL_OP_N, C_d, height_, width_, + b.height_, scaleAB, scaleT); } void GpuMatrix::mul(const GpuMatrix& a, const GpuSparseMatrix& b, real scaleAB, @@ -497,11 +497,11 @@ void GpuMatrix::mul(const GpuMatrix& a, const GpuSparseMatrix& b, real scaleAB, << "Matrix dimensions are not equal"; } if (b.format_ == SPARSE_CSC) { - hl_matrix_dense_mul_csc(A_d, HPPL_OP_N, B_d, transB, C_d, height_, - width_, a.width_, scaleAB, scaleT); + hl_matrix_dense_mul_csc(A_d, HPPL_OP_N, B_d, transB, C_d, height_, width_, + a.width_, scaleAB, scaleT); } else { - hl_matrix_dense_mul_csr(A_d, HPPL_OP_N, B_d, transB, C_d, height_, - width_, a.width_, scaleAB, scaleT); + hl_matrix_dense_mul_csr(A_d, HPPL_OP_N, B_d, transB, C_d, height_, width_, + a.width_, scaleAB, scaleT); } } @@ -563,8 +563,8 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) { size_t tableSize = table.getHeight(); int* index = ids.getData(); - hl_matrix_select_rows(a, stride_, table.getData(), table.stride_, - index, numSamples, tableSize, dim); + hl_matrix_select_rows(a, stride_, table.getData(), table.stride_, index, + numSamples, tableSize, dim); #endif } @@ -581,8 +581,8 @@ void GpuMatrix::addToRows(Matrix& table, IVector& ids) { size_t tableSize = table.getHeight(); int* index = ids.getData(); - hl_matrix_add_to_rows(table.getData(), table.stride_, a, stride_, - index, numSamples, tableSize, dim); + hl_matrix_add_to_rows(table.getData(), table.stride_, a, stride_, index, + numSamples, tableSize, dim); #endif } @@ -617,13 +617,8 @@ void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { CHECK_EQ(maxIds.getSize(), numSamples * beam); CHECK_EQ(maxVal.getHeight(), numSamples); - hl_matrix_top_k(maxVal.getData(), - maxVal.getStride(), - maxIds.getData(), - this->getData(), - this->getStride(), - this->getWidth(), - beam, + hl_matrix_top_k(maxVal.getData(), maxVal.getStride(), maxIds.getData(), + this->getData(), this->getStride(), this->getWidth(), beam, numSamples); #endif } @@ -647,12 +642,12 @@ void GpuMatrix::maxoutForward(Matrix& a, IVector& id, size_t channels, size_t size = getWidth(); size_t batchSize = getHeight(); - const real* input = a.getData(); + const real* input = a.getData(); real* output = getData(); int* idForGpu = id.getData(); - hl_maxout_forward(input, output, idForGpu, batchSize, size, - size / channels, groups); + hl_maxout_forward(input, output, idForGpu, batchSize, size, size / channels, + groups); } void GpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels, @@ -663,12 +658,12 @@ void GpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t size = a.getWidth(); size_t batchSize = getHeight(); - real* input = getData(); + real* input = getData(); const real* output = a.getData(); const int* idForGpu = id.getData(); - hl_maxout_backward(input, output, idForGpu, batchSize, size, - size / channels, groups); + hl_maxout_backward(input, output, idForGpu, batchSize, size, size / channels, + groups); } /*calulate the error of classification */ @@ -684,8 +679,8 @@ void GpuMatrix::classificationError(MatrixPtr output, IVectorPtr label) { real* recResult_d = data_; int* label_d = label_ptr->getData(); - hl_matrix_classification_error(output_d, label_d, recResult_d, - height_, output_ptr->width_); + hl_matrix_classification_error(output_d, label_d, recResult_d, height_, + output_ptr->width_); } /* copy -log(output[i * width + label]) to this->data[i] */ @@ -754,8 +749,7 @@ void GpuMatrix::sequenceSoftmax(Matrix& output, const IVector& index) { real* outputData = output.getData(); auto starts = index.getData(); int numSequences = index.getSize() - 1; - hl_sequence_softmax_forward(inputData, outputData, - starts, numSequences); + hl_sequence_softmax_forward(inputData, outputData, starts, numSequences); } void GpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { @@ -769,8 +763,7 @@ void GpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { real* output_d = output.data_; real* sftmaxSum_d = sftmaxSum.data_; real* grad_d = data_; - hl_matrix_softmax_derivative(grad_d, output_d, sftmaxSum_d, height_, - width_); + hl_matrix_softmax_derivative(grad_d, output_d, sftmaxSum_d, height_, width_); } void GpuMatrix::softmaxBackward(Matrix& outputV) { @@ -821,7 +814,7 @@ void GpuMatrix::scaledTanh(Matrix& output, real p1, real p2) { } void GpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) { CHECK(output1.useGpu_ == true && output2.useGpu_ == true) - << "Matrix type are not equal"; + << "Matrix type are not equal"; size_t numSamples = getHeight(); size_t dim = output1.getWidth(); CHECK_EQ(getWidth(), 1UL); @@ -830,15 +823,15 @@ void GpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) { real* out = getData(); real* x = output1.getData(); real* y = output2.getData(); - hl_cossim(out, x, y, - dim, output1.getHeight(), output2.getHeight(), scale); + hl_cossim(out, x, y, dim, output1.getHeight(), output2.getHeight(), scale); } void GpuMatrix::cosSimDerivative(Matrix& output, Matrix& prevOut1, Matrix& prevOut2, Matrix& prevGrad1, Matrix& prevGrad2, real scale) { CHECK(output.useGpu_ == true && prevOut1.useGpu_ == true && prevOut2.useGpu_ == true && prevGrad1.useGpu_ == true && - prevGrad2.useGpu_ == true) << "Matrix type are not equal"; + prevGrad2.useGpu_ == true) + << "Matrix type are not equal"; CHECK_EQ(getWidth(), 1UL); CHECK_EQ(output.getWidth(), 1UL); @@ -858,9 +851,8 @@ void GpuMatrix::cosSimDerivative(Matrix& output, Matrix& prevOut1, real* prevOutY = prevOut2.getData(); real* prevGradX = prevGrad1.getData(); real* prevGradY = prevGrad2.getData(); - hl_cossim_derivative(grad, out, prevOutX, prevOutY, - prevGradX, prevGradY, dim, - prevOut1.getHeight(), prevOut2.getHeight(), scale); + hl_cossim_derivative(grad, out, prevOutX, prevOutY, prevGradX, prevGradY, dim, + prevOut1.getHeight(), prevOut2.getHeight(), scale); } void GpuMatrix::randomizeUniform() { @@ -911,8 +903,8 @@ void GpuMatrix::check(std::ostream& os, Matrix& refMat, bool printDiff) { void GpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, - int strideW, int paddingH, int paddingW, - int outputH, int outputW) { + int strideW, int paddingH, int paddingW, int outputH, + int outputW) { CHECK(feature.useGpu_ == true) << "Matrix type are not equal"; CHECK_EQ(size_t(feaImgHeight * feaImgWidth * channels), @@ -922,17 +914,16 @@ void GpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth, size_t elemCnt = outputH * outputW * blockH * blockW * channels; CHECK_EQ(elemCnt, height_ * width_) << "Matrix dimensions are not equal"; - hl_expand_feature2col(feature.getData(), channels, feaImgHeight, - feaImgWidth, blockH, blockW, strideH, strideW, - paddingH, paddingW, outputH, outputW, - getData()); + hl_expand_feature2col(feature.getData(), channels, feaImgHeight, feaImgWidth, + blockH, blockW, strideH, strideW, paddingH, paddingW, + outputH, outputW, getData()); } void GpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, - int paddingW, int outputH, int outputW, - real alpha, real beta) { + int paddingW, int outputH, int outputW, real alpha, + real beta) { CHECK(expandFeat.useGpu_ == true) << "Matrix type are not equal"; CHECK_EQ(size_t(thisImgHeight * thisImgWidth * channels), getHeight() * getWidth()) @@ -941,18 +932,17 @@ void GpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight, size_t elemCnt = outputH * outputW * blockW * blockH * channels; CHECK(elemCnt == expandFeat.getHeight() * expandFeat.getWidth()) << "Matrix dimensions are not equal"; - hl_shrink_col2feature( - expandFeat.getData(), channels, thisImgHeight, thisImgWidth, blockH, - blockW, strideH, strideW, paddingH, paddingW, outputH, outputW, - getData(), alpha, beta); + hl_shrink_col2feature(expandFeat.getData(), channels, thisImgHeight, + thisImgWidth, blockH, blockW, strideH, strideW, + paddingH, paddingW, outputH, outputW, getData(), alpha, + beta); } void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, - size_t imgSizeW, size_t channels, - size_t sizeX, size_t sizeY, - size_t strideH, size_t strideW, - size_t outputH, size_t outputW, - size_t paddingH, size_t paddingW) { + size_t imgSizeW, size_t channels, size_t sizeX, + size_t sizeY, size_t strideH, size_t strideW, + size_t outputH, size_t outputW, size_t paddingH, + size_t paddingW) { CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; real* inputData = inputMat.getData(); @@ -963,16 +953,15 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, CHECK(height_ == inputMat.getHeight()); CHECK(width_ == outputH * outputW * channels); - hl_maxpool_forward(frameNum, inputData, channels, height, width, - outputH, outputW, sizeX, sizeY, strideH, strideW, - paddingH, paddingW, data_); + hl_maxpool_forward(frameNum, inputData, channels, height, width, outputH, + outputW, sizeX, sizeY, strideH, strideW, paddingH, + paddingW, data_, getStride()); } void GpuMatrix::maxPoolBackward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW, Matrix& outGrad, Matrix& outV, - size_t sizeX, size_t sizeY, - size_t strideH, size_t strideW, - size_t outputH, size_t outputW, + size_t sizeX, size_t sizeY, size_t strideH, + size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW) { CHECK(inputMat.useGpu_ == true && outGrad.useGpu_ == true && @@ -992,19 +981,17 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat, size_t imgSizeH, CHECK(outGrad.getHeight() == outV.getHeight() && outGrad.getWidth() == outV.getWidth()); - - hl_maxpool_backward(frameNum, inputData, outData, outDiff, channels, - height, width, outputH, outputW, sizeX, sizeY, - strideH, strideW, paddingH, paddingW, - scaleTargets, scaleOutput, data_); + hl_maxpool_backward(frameNum, inputData, outData, outDiff, channels, height, + width, outputH, outputW, sizeX, sizeY, strideH, strideW, + paddingH, paddingW, scaleTargets, scaleOutput, data_, + outGrad.getStride()); } void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t imgSizeH, - size_t imgSizeW, size_t channels, - size_t sizeX, size_t sizeY, - size_t strideH, size_t strideW, - size_t outputH, size_t outputW, - size_t paddingH, size_t paddingW) { + size_t imgSizeW, size_t channels, size_t sizeX, + size_t sizeY, size_t strideH, size_t strideW, + size_t outputH, size_t outputW, size_t paddingH, + size_t paddingW) { CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; real* inputData = inputMat.getData(); @@ -1015,18 +1002,17 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t imgSizeH, CHECK(height_ == inputMat.getHeight()); CHECK(width_ == outputH * outputW * channels); - hl_avgpool_forward(frameNum, inputData, channels, height, width, - outputH, outputW, sizeX, sizeY, - strideH, strideW, - paddingH, paddingW, data_); + hl_avgpool_forward(frameNum, inputData, channels, height, width, outputH, + outputW, sizeX, sizeY, strideH, strideW, paddingH, + paddingW, data_, getStride()); } void GpuMatrix::avgPoolBackward(Matrix& outGrad, size_t imgSizeH, size_t imgSizeW, size_t sizeX, size_t sizeY, - size_t strideH, size_t strideW, - size_t outputH, size_t outputW, - real scaleTargets, real scaleOutput, - size_t paddingH, size_t paddingW) { + size_t strideH, size_t strideW, size_t outputH, + size_t outputW, real scaleTargets, + real scaleOutput, size_t paddingH, + size_t paddingW) { CHECK(outGrad.useGpu_ == true) << "Matrix type are not equal"; real* outDiff = outGrad.getData(); @@ -1038,11 +1024,10 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, size_t imgSizeH, CHECK(height_ == outGrad.getHeight()); CHECK(outGrad.getWidth() == outputH * outputW * channels); - hl_avgpool_backward(frameNum, outDiff, channels, height, width, - outputH, outputW, sizeX, sizeY, - strideH, strideW, paddingH, paddingW, - scaleTargets, scaleOutput, - data_); + hl_avgpool_backward(frameNum, outDiff, channels, height, width, outputH, + outputW, sizeX, sizeY, strideH, strideW, paddingH, + paddingW, scaleTargets, scaleOutput, data_, + outGrad.getStride()); } void GpuMatrix::crossMapNormalFwd(Matrix& input, size_t imgSizeH, @@ -1057,8 +1042,8 @@ void GpuMatrix::crossMapNormalFwd(Matrix& input, size_t imgSizeH, CHECK(denoms.getHeight() == input.getHeight() && denoms.getWidth() == input.getWidth() && input.getHeight() == height_ && input.getWidth() == width_); - hl_CMRNorm_forward(num, input.getData(), denoms.getData(), data_, - channels, height, width, sizeX, scale, -pow); + hl_CMRNorm_forward(num, input.getData(), denoms.getData(), data_, channels, + height, width, sizeX, scale, -pow); } void GpuMatrix::crossMapNormalBwd(Matrix& localGrad, Matrix& denoms, @@ -1078,13 +1063,11 @@ void GpuMatrix::crossMapNormalBwd(Matrix& localGrad, Matrix& denoms, denoms.getWidth() == localGrad.getWidth()); hl_CMRNorm_backward(num, preOutV.getData(), denoms.getData(), - localOutV.getData(), localGrad.getData(), data_, - channels, height, width, sizeX, -pow, - 2.0f * pow * scale); + localOutV.getData(), localGrad.getData(), data_, channels, + height, width, sizeX, -pow, 2.0f * pow * scale); } -void GpuMatrix::maxSequenceForward(Matrix& input, - const IVector& sequence, +void GpuMatrix::maxSequenceForward(Matrix& input, const IVector& sequence, IVector& index) { CHECK(dynamic_cast(&input)); CHECK(dynamic_cast(&sequence)); @@ -1101,12 +1084,11 @@ void GpuMatrix::maxSequenceForward(Matrix& input, CHECK_EQ(numSequences, sequence.getSize() - 1); CHECK_EQ(numSequences * dim, index.getSize()); - hl_max_sequence_forward(inputData, starts, outData, maxIndex, - numSequences, dim); + hl_max_sequence_forward(inputData, starts, outData, maxIndex, numSequences, + dim); } -void GpuMatrix::maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, +void GpuMatrix::maxSequenceBackward(Matrix& outputGrad, const IVector& sequence, IVector& index) { CHECK(dynamic_cast(&outputGrad)); CHECK(dynamic_cast(&sequence)); @@ -1163,9 +1145,8 @@ void GpuMatrix::contextProjectionBackwardData(MatrixPtr inputGrad, real* inGrad = inputGrad->getData(); const int* starts = sequence.getData(); - hl_context_projection_backward_data(outGrad, starts, inGrad, - numSequences, inputDim, - contextLength, contextStart); + hl_context_projection_backward_data(outGrad, starts, inGrad, numSequences, + inputDim, contextLength, contextStart); } void GpuMatrix::contextProjectionBackwardWeight(MatrixPtr weightGrad, @@ -1185,9 +1166,9 @@ void GpuMatrix::contextProjectionBackwardWeight(MatrixPtr weightGrad, real* wtGrad = weightGrad->getData(); const int* starts = sequence.getData(); - hl_context_projection_backward_weight( - outGrad, starts, wtGrad, numSequences, weightDim, totalPad, contextLength, - contextStart, beginPad); + hl_context_projection_backward_weight(outGrad, starts, wtGrad, numSequences, + weightDim, totalPad, contextLength, + contextStart, beginPad); } void GpuMatrix::paramReluForward(Matrix& data, Matrix& W) { @@ -1199,8 +1180,7 @@ void GpuMatrix::paramReluForward(Matrix& data, Matrix& W) { size_t numSamples = data.getHeight(); size_t partial_sum = numElements / (W.getHeight() * W.getWidth()); real* output = getData(); - hl_param_relu_forward(output, input, w, numElements, numSamples, - partial_sum); + hl_param_relu_forward(output, input, w, numElements, numSamples, partial_sum); } void GpuMatrix::paramReluBackwardW(Matrix& oGrad, Matrix& data) { @@ -1212,8 +1192,8 @@ void GpuMatrix::paramReluBackwardW(Matrix& oGrad, Matrix& data) { size_t numElements = data.getWidth(); size_t numSamples = data.getHeight(); size_t partial_sum = numElements / (this->getHeight() * this->getWidth()); - hl_param_relu_backward_w(wgrad, ograd, input, - numElements, numSamples, partial_sum); + hl_param_relu_backward_w(wgrad, ograd, input, numElements, numSamples, + partial_sum); } void GpuMatrix::paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) { @@ -1224,8 +1204,8 @@ void GpuMatrix::paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) { size_t numElements = data.getWidth(); size_t numSamples = data.getHeight(); size_t partial_sum = numElements / (W.getHeight() * W.getWidth()); - hl_param_relu_backward_diff(ograd, input, w, diff, - numElements, numSamples, partial_sum); + hl_param_relu_backward_diff(ograd, input, w, diff, numElements, numSamples, + partial_sum); } void GpuMatrix::addColumnVector(const Matrix& b) { @@ -1571,8 +1551,8 @@ void CpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) { void CpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth, int channels, int blockH, int blockW, int strideH, - int strideW, int paddingH, int paddingW, - int outputH, int outputW) { + int strideW, int paddingH, int paddingW, int outputH, + int outputW) { CHECK(feature.useGpu_ == false) << "Matrix type are not equal"; CHECK_EQ(size_t(feaImgHeight * feaImgWidth * channels), @@ -1612,8 +1592,8 @@ void CpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth, void CpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight, int thisImgWidth, int channels, int blockH, int blockW, int strideH, int strideW, int paddingH, - int paddingW, int outputH, int outputW, - real alpha, real beta) { + int paddingW, int outputH, int outputW, real alpha, + real beta) { CHECK(expandFeat.useGpu_ == false) << "Matrix type are not equal"; CHECK_EQ(size_t(thisImgHeight * thisImgWidth * channels), getHeight() * getWidth()) @@ -1650,11 +1630,10 @@ void CpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight, } void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, - size_t imgSizeW, size_t channels, - size_t sizeX, size_t sizeY, - size_t strideH, size_t strideW, - size_t outputH, size_t outputW, - size_t paddingH, size_t paddingW) { + size_t imgSizeW, size_t channels, size_t sizeX, + size_t sizeY, size_t strideH, size_t strideW, + size_t outputH, size_t outputW, size_t paddingH, + size_t paddingW) { real* inputData = inputMat.getData(); real* outData = data_; size_t num = inputMat.getHeight(); @@ -1662,15 +1641,21 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t inHeight = imgSizeH; CHECK(inHeight * inWidth == inputMat.getWidth() / channels); CHECK_EQ(num, this->getHeight()); - CHECK_EQ(channels*outputH*outputW, this->getWidth()); + CHECK_EQ(channels * outputH * outputW, this->getWidth()); + size_t outStride = getStride(); /* initialize the data_ */ - for (size_t i = 0; i < height_ * width_; i++) { - outData[i] = -(real)FLT_MAX; + for (size_t i = 0; i < height_; i++) { + for (size_t j = 0; j < width_; j++) { + outData[i * outStride + j] = -(real)FLT_MAX; + } } /* pool max one by one */ - for (size_t n = 0; n < num; ++n) { // frame by frame + for (size_t n = 0; n < num; ++n) { // frame by frame + if (!isContiguous()) { + outData = data_ + n * outStride; + } for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t ph = 0; ph < outputH; ++ph) { for (size_t pw = 0; pw < outputW; ++pw) { @@ -1712,7 +1697,16 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, real* inData = image.getData(); real* otData = outV.getData(); real* otGrad = outGrad.getData(); + + size_t outStride = outV.getStride(); + real* origOutData = otData; + real* origOutGrad = otGrad; + for (size_t n = 0; n < num; ++n) { + if (!outV.isContiguous()) { + otData = origOutData + n * outStride; + otGrad = origOutGrad + n * outStride; + } for (size_t c = 0; c < channels; ++c) { for (size_t ph = 0; ph < outputH; ++ph) { for (size_t pw = 0; pw < outputW; ++pw) { @@ -1743,9 +1737,9 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, size_t sizeY, - size_t strideH, size_t strideW, - size_t outputH, size_t outputW, - size_t paddingH, size_t paddingW) { + size_t strideH, size_t strideW, size_t outputH, + size_t outputW, size_t paddingH, + size_t paddingW) { // The main loop size_t num = input.getHeight(); size_t inHeight = imgSizeH; @@ -1756,6 +1750,9 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, real* inData = input.getData(); for (size_t n = 0; n < num; ++n) { + if (!isContiguous()) { + tgtData = data_ + n * getStride(); + } for (size_t c = 0; c < channels; ++c) { for (size_t ph = 0; ph < outputH; ++ph) { for (size_t pw = 0; pw < outputW; ++pw) { @@ -1787,9 +1784,8 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, } void CpuMatrix::avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW, - size_t sizeX, size_t sizeY, - size_t strideH, size_t strideW, - size_t outputH, size_t outputW, + size_t sizeX, size_t sizeY, size_t strideH, + size_t strideW, size_t outputH, size_t outputW, real scaleTargets, real scaleOutput, size_t paddingH, size_t paddingW) { size_t num = input.getHeight(); @@ -1799,6 +1795,9 @@ void CpuMatrix::avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW, real* outData = getData(); for (size_t n = 0; n < num; ++n) { + if (!input.isContiguous()) { + inData = input.getData() + n * input.getStride(); + } for (size_t c = 0; c < channels; ++c) { for (size_t ph = 0; ph < outputH; ++ph) { for (size_t pw = 0; pw < outputW; ++pw) { @@ -1901,8 +1900,7 @@ void CpuMatrix::crossMapNormalBwd(Matrix& localGrad, Matrix& denoms, * Output: output size is the number of input sequences (NOT input instances). * output[i] is set to max_{for each instance in this sequence}{input[i]} */ -void CpuMatrix::maxSequenceForward(Matrix& input, - const IVector& sequence, +void CpuMatrix::maxSequenceForward(Matrix& input, const IVector& sequence, IVector& index) { CHECK(dynamic_cast(&input)); CHECK(dynamic_cast(&sequence)); @@ -1943,8 +1941,7 @@ void CpuMatrix::maxSequenceForward(Matrix& input, } } -void CpuMatrix::maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, +void CpuMatrix::maxSequenceBackward(Matrix& outputGrad, const IVector& sequence, IVector& index) { CHECK(dynamic_cast(&outputGrad)); CHECK(dynamic_cast(&sequence)); @@ -2776,7 +2773,7 @@ void SharedCpuMatrix::mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, blockSeq.push_back(k); } std::shuffle(blockSeq.begin(), blockSeq.end(), - ThreadLocalRandomEngine::get()); + ThreadLocalRandomEngine::get()); } std::vector& localBufRows = *localBufRows_; int* cols = a->getCols(); @@ -3007,7 +3004,7 @@ void CpuMatrix::maxoutForward(Matrix& a, IVector& id, size_t channels, size_t size = getWidth(); size_t batchSize = getHeight(); size_t featLen = size / channels; - const real* input = a.getData(); + const real* input = a.getData(); int* idForCpu = id.getData(); MatrixPtr maxInMat, maxOutMat; @@ -3041,8 +3038,8 @@ void CpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t batchSize = getHeight(); size_t featLen = size / channels; size_t newFeatLen = groups * featLen; - real* inputG = getData(); - const real* outG = a.getData(); + real* inputG = getData(); + const real* outG = a.getData(); int* idForCpu = id.getData(); for (size_t batch_idx = 0; batch_idx < batchSize; ++batch_idx) { @@ -3266,9 +3263,9 @@ void CpuMatrix::sequenceSoftmax(Matrix& output, const IVector& index) { CHECK(isContiguous()); MatrixPtr inTmp = Matrix::create(nullptr, /* height= */ 1, 1, - /* trans= */ false, false); + /* trans= */ false, false); MatrixPtr outTmp = Matrix::create(nullptr, /* height= */ 1, 1, - /* trans= */ false, false); + /* trans= */ false, false); size_t numSequences = index.getSize() - 1; auto starts = index.getData(); for (size_t i = 0; i < numSequences; ++i) { diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index 2cdff9d1aca927122fcdb0c2a7ab22a0e38b41c1..b16d4314654ffeab74137ec1ee69203dab56d851 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -378,7 +378,7 @@ hl_activation_mode_t hlActiveType(const std::string& type) { return HL_ACTIVATION_RELU; } else if (type == "tanh") { return HL_ACTIVATION_TANH; - } else if (type == "linear") { + } else if (type == "linear" || type == "") { return HL_ACTIVATION_LINEAR; } else { LOG(FATAL) << "Do not support activation type " << type; diff --git a/proto/ModelConfig.proto.m4 b/proto/ModelConfig.proto.m4 index 479b457e55a7dac58ff390cce8d67d46da3b474d..aea77248cbac0f3ee044b05894d37718e692a0fc 100644 --- a/proto/ModelConfig.proto.m4 +++ b/proto/ModelConfig.proto.m4 @@ -120,6 +120,14 @@ message PoolConfig { optional uint32 padding_y = 13 [default = 0]; } +message SppConfig { + required string pool_type = 1; + required uint32 pyramid_height = 2; + required uint32 channels = 3; + required uint32 img_size = 4; + optional uint32 img_size_y = 5; +} + message NormConfig { // rnorm or cmrnorm required string norm_type = 1; @@ -196,6 +204,9 @@ message ProjectionConfig { // For IdentityOffsetProjection optional uint64 offset = 11 [default = 0]; + + // For pool + optional PoolConfig pool_conf = 12; } message OperatorConfig { @@ -245,6 +256,7 @@ message LayerInputConfig { optional string input_layer_argument = 9; optional BilinearInterpConfig bilinear_interp_conf = 10; optional MaxOutConfig maxout_conf = 11; + optional SppConfig spp_conf = 12; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index c55579c960ecc3f3640d2cb6e77641f0f14a3328..eec978e1faf48805c70f25bdd55e4183fddb2fcc 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -218,7 +218,7 @@ def Inputs(*args): @config_func def HasInputsSet(): - return len(g_config.model_config.input_layer_names) != 0 + return len(g_current_submodel.input_layer_names) != 0 # Define the name of the output layers of the NeuralNetwork. @@ -471,6 +471,7 @@ class Input(Cfg): image=None, block_expand=None, maxout=None, + spp=None, format=None, nnz=None, is_static=None, @@ -671,7 +672,6 @@ class ConvProjection(Projection): def calc_parameter_dims(self, input_size, output_size): return None - # Define a operator for mixed layer @config_class class Operator(Cfg): @@ -795,6 +795,17 @@ class Pool(Cfg): padding = None, padding_y = None): self.add_keys(locals()) + +# please refer to the comments in proto/ModelConfig.proto +@config_class +class SpatialPyramidPool(Cfg): + def __init__( + self, + pool_type, + pyramid_height, + channels, + img_width = None): + self.add_keys(locals()) # please refer to the comments in proto/ModelConfig.proto @config_class @@ -1081,6 +1092,22 @@ def parse_pool(pool, input_layer_name, pool_conf): pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y, pool_conf.padding_y, pool_conf.stride_y, False) +def parse_spp(spp, input_layer_name, spp_conf): + spp_conf.pool_type = spp.pool_type + config_assert(spp.pool_type in ['max-projection', 'avg-projection'], + "pool-type %s is not in " "['max-projection', 'avg-projection']" + % spp.pool_type) + spp_conf.pyramid_height = spp.pyramid_height + spp_conf.channels = spp.channels + + img_pixels = g_layer_map[input_layer_name].size / spp_conf.channels + + spp_conf.img_size = default(spp.img_width, int(img_pixels ** 0.5)) + spp_conf.img_size_y = img_pixels / spp_conf.img_size + config_assert(spp_conf.img_size * spp_conf.img_size_y == img_pixels, + "Incorrect input image size %d for input image pixels %d" + % (spp_conf.img_size, img_pixels)) + def parse_image(image, input_layer_name, image_conf): image_conf.channels = image.channels image_pixels = g_layer_map[input_layer_name].size / image_conf.channels @@ -1170,14 +1197,14 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf): block_expand_conf.output_x = 0 else: block_expand_conf.output_x = cnn_output_size( - block_expand.img_size_x, block_expand.block_x, + block_expand.img_size_x, block_expand.block_x, block_expand.padding_x, block_expand.stride_x, False) if block_expand_conf.img_size_y == 0: block_expand_conf.output_y = 0 else: block_expand_conf.output_y = cnn_output_size( - block_expand.img_size_y, block_expand.block_y, + block_expand.img_size_y, block_expand.block_y, block_expand.padding_y, block_expand.stride_y, False) def parse_maxout(maxout, input_layer_name, maxout_conf): @@ -1185,7 +1212,7 @@ def parse_maxout(maxout, input_layer_name, maxout_conf): maxout_conf.groups = maxout.groups maxout_conf.img_size_x = maxout.img_size_x maxout_conf.img_size_y = maxout.img_size_y - + # Define an evaluator @config_func def Evaluator( @@ -1756,6 +1783,25 @@ class PoolLayer(LayerBase): name, pool_conf.output_y, pool_conf.output_x)) self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels) +@config_layer('spp') +class SpatialPyramidPoolLayer(LayerBase): + def __init__( + self, + name, + inputs, + device=None): + super(SpatialPyramidPoolLayer, self).__init__(name, 'spp', 0, inputs=inputs, device=device) + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + parse_spp( + self.inputs[input_index].spp, + input_layer.name, + self.config.inputs[input_index].spp_conf) + spp_conf = self.config.inputs[input_index].spp_conf + output_size = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1) + print("output size for %s is %d " % (name, output_size)) + self.set_layer_size(output_size * spp_conf.channels) + @config_layer('batch_norm') class BatchNormLayer(LayerBase): layer_type = 'batch_norm' @@ -1881,7 +1927,7 @@ class MaxOutLayer(LayerBase): self.config.inputs[0].maxout_conf) maxout_conf = self.config.inputs[0].maxout_conf self.set_layer_size(g_layer_map[input_layer.name].size / maxout_conf.groups) - + # key: cost type # value: cost class g_cost_map = {} @@ -1903,6 +1949,7 @@ define_cost('SumOfSquaresCostLayer', 'square_error') define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy') define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') define_cost('HuberTwoClass', 'huber') +define_cost('SumCost', 'sum_cost') @config_layer('hsigmoid') class HierarchicalSigmoidLayer(LayerBase): @@ -3015,7 +3062,7 @@ def Layer( layer_func = layers.get(type) config_assert(layer_func, "layer type '%s' not supported." % type) - layer_func(name, **xargs) + return layer_func(name, **xargs) @config_func def ParameterHook( diff --git a/python/paddle/trainer_config_helpers/__init__.py b/python/paddle/trainer_config_helpers/__init__.py index 451b9ac3396eadf9fab2b5fd940a6f924e042976..adebebba2523f851507c4a0525eeaae9cfeb9dcc 100644 --- a/python/paddle/trainer_config_helpers/__init__.py +++ b/python/paddle/trainer_config_helpers/__init__.py @@ -20,3 +20,6 @@ from layers import * from networks import * from optimizers import * from attrs import * + +# This will enable operator overload for LayerOutput +import math diff --git a/python/paddle/trainer_config_helpers/activations.py b/python/paddle/trainer_config_helpers/activations.py index ad5cdc0a0eb13f7a58e7d89ebfb79d33a63b75d5..2202d0bf96976d5ca694f1417af5da9c31eaa9f0 100644 --- a/python/paddle/trainer_config_helpers/activations.py +++ b/python/paddle/trainer_config_helpers/activations.py @@ -23,9 +23,9 @@ __all__ = ["TanhActivation", "SigmoidActivation", class BaseActivation(object): """ - A mark for activation class. + A mark for activation class. Each activation inherit BaseActivation, which has two parameters. - + :param name: activation name in paddle config. :type name: basestring :param support_hppl: True if supported by hppl. HPPL is a library used by paddle @@ -194,7 +194,7 @@ class SquareActivation(BaseActivation): class ExpActivation(BaseActivation): """ Exponential Activation. - + .. math:: f(z) = e^z. """ diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 59822180883c9a41990e67625c6c75952e7c34b7..82c57e7f90ad53aa91ea4b4e4afe8b8308bbedc8 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -31,6 +31,7 @@ import copy __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", "identity_projection", "dotmul_projection", "dotmul_operator", + "repeat_layer", "table_projection", "mixed_layer", "data_layer", "embedding_layer", "fc_layer", "grumemory", "pooling_layer", "lstmemory", "last_seq", "first_seq", @@ -52,10 +53,11 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", 'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer', 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', - 'multi_binary_label_cross_entropy', + 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', 'huber_cost', 'block_expand_layer', - 'maxout_layer', 'out_prod_layer', 'print_layer' + 'maxout_layer', 'out_prod_layer', 'print_layer', + 'spp_layer', ] @@ -99,6 +101,7 @@ class LayerType(object): SCALING_LAYER = 'scaling' TRANS_LAYER = 'trans' OUT_PROD_LAYER = 'out_prod' + FEATURE_MAP_EXPAND_LAYER = 'featmap_expand' MEMORY = 'memory' MAXID_LAYER = 'maxid' @@ -113,6 +116,7 @@ class LayerType(object): LINEAR_COMBINATION_LAYER = "convex_comb" BLOCK_EXPAND = "blockexpand" MAXOUT = "maxout" + SPP_LAYER = "spp" PRINT_LAYER = "print" @@ -128,6 +132,7 @@ class LayerType(object): CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm" SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy" MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy" + SUM_COST = "sum_cost" @staticmethod def is_layer_type(type_name): @@ -181,6 +186,7 @@ class LayerOutput(object): reverse=None): assert isinstance(name, basestring) assert isinstance(layer_type, basestring) + assert size is not None assert LayerType.is_layer_type(layer_type) self.name = name self.layer_type = layer_type @@ -873,6 +879,7 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None, size=input.size) + @wrap_bias_attr_default() @wrap_param_attr_default() @wrap_act_default(param_names=['gate_act'], @@ -1209,6 +1216,48 @@ def expand_layer(input, expand_as, parents=[input, expand_as]) +@wrap_name_default() +@layer_support() +def repeat_layer(input, num_repeats, + name=None, + layer_attr=None): + """ + A layer for repeating the input for num_repeats times. This is equivalent + to apply concat_layer() with num_repeats same input. + + .. math:: + y = [x, x, \cdots, x] + + The example usage is: + + .. code-block:: python + + expand = repeat_layer(layer, 4) + + :param input: Input layer + :type input: LayerOutput + :param num_repeats: Repeat the input so many times + :type num_repeats: int + :param name: Layer name. + :type name: basestring + :param layer_attr: extra layer attributes. + :type layer_attr: ExtraLayerAttribute. + :return: LayerOutput object. + :rtype: LayerOutput + """ + + l = Layer( + inputs=[input.name], + name=name, + num_filters=num_repeats, + type=LayerType.FEATURE_MAP_EXPAND_LAYER, + **ExtraAttr.to_kwargs(layer_attr) + ) + return LayerOutput(name=name, + size=l.config.size, + layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER, + parents=[input]) + @wrap_name_default() @layer_support() def interpolation_layer(input, weight, name=None, layer_attr=None): @@ -1296,7 +1345,7 @@ def bilinear_interp_layer(input, assert out_size_x > 0 and out_size_y > 0 assert input.num_filters is not None num_channels = input.num_filters - Layer(name=name, + l = Layer(name=name, inputs=Input(input.name, bilinear_interp=BilinearInterp(out_size_x=out_size_x, out_size_y=out_size_y, @@ -1304,7 +1353,7 @@ def bilinear_interp_layer(input, type=LayerType.BILINEAR_INTERP_LAYER, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput(name, LayerType.BILINEAR_INTERP_LAYER, parents=[input], - num_filters=num_channels) + num_filters=num_channels, size=l.config.size) @wrap_name_default() @layer_support() @@ -1482,7 +1531,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): inputs=[a.name, b.name], **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b]) + return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b], size=size) @wrap_name_default() @@ -1545,7 +1594,7 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None, ipts_for_layer.append(label.name) parents.append(label) - Layer( + l = Layer( name=name, type=LayerType.HSIGMOID, num_classes=num_classes, @@ -1553,7 +1602,8 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None, inputs=ipts_for_layer, **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.HSIGMOID, parents=parents) + return LayerOutput(name, LayerType.HSIGMOID, parents=parents, + size=l.config.size) @wrap_name_default("conv") @@ -1671,7 +1721,7 @@ def img_conv_layer(input, filter_size, num_filters, lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER - Layer( + l = Layer( name=name, inputs=Input(input.name, conv=Conv( filter_size=filter_size, padding=padding, stride=stride, @@ -1687,7 +1737,8 @@ def img_conv_layer(input, filter_size, num_filters, **ExtraLayerAttribute.to_kwargs(layer_attr) ) return LayerOutput(name, lt, parents=[input], - activation=act, num_filters=num_filters) + activation=act, num_filters=num_filters, + size=l.config.size) @wrap_name_default("pool") @@ -1718,7 +1769,7 @@ def img_pool_layer(input, pool_size, name=None, :type pool_size_y: int|None :param num_channels: number of input channel. :type num_channels: int - :param pool_type: pooling type. MaxPooling or AveragePooling. Default is + :param pool_type: pooling type. MaxPooling or AvgPooling. Default is MaxPooling. :type pool_type: BasePoolingType :param stride: stride width of pooling. @@ -1750,7 +1801,7 @@ def img_pool_layer(input, pool_size, name=None, stride_y = stride if stride_y is None else stride_y padding_y = padding if padding_y is None else padding_y - Layer( + l = Layer( name=name, type=LayerType.POOL_LAYER, inputs=[Input(input.name, @@ -1769,6 +1820,62 @@ def img_pool_layer(input, pool_size, name=None, **ExtraLayerAttribute.to_kwargs(layer_attr) ) return LayerOutput(name, LayerType.POOL_LAYER, parents=[input], + num_filters=num_channels, size=l.config.size) + + +@wrap_name_default("spp") +@layer_support() +def spp_layer(input, name=None, num_channels=None, pool_type=None, + pyramid_height=None, img_width=None, layer_attr=None): + pass + """ + Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. + The details please refer to + `Kaiming He's paper `_. + + :param name: layer name. + :type name: basestring + :param input: layer's input. + :type input: LayerOutput + :param num_channels: number of input channel. + :type num_channels: int + :param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling. + :type scale: BasePoolingType + :param pyramid_height: pyramid height. + :type pyramid_height: int + :param img_width: the width of input feature map. If it is None, the input feature + map should be square. + :type img_width: int|None + :param layer_attr: Extra Layer Attribute. + :type layer_attr: ExtraLayerAttribute + :return: LayerOutput object. + :rtype: LayerOutput + """ + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + + if pool_type is None: + pool_type = MaxPooling() + elif isinstance(pool_type, AvgPooling): + pool_type.name = 'avg' + + type_name = pool_type.name + if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)): + type_name += '-projection' + + Layer( + name=name, + type=LayerType.SPP_LAYER, + inputs=Input(input.name, + spp=SpatialPyramidPool(pool_type=type_name, + channels=num_channels, + pyramid_height=pyramid_height, + img_width=img_width) + ), + **ExtraLayerAttribute.to_kwargs(layer_attr) + ) + return LayerOutput(name, LayerType.SPP_LAYER, parents=[input], num_filters=num_channels) @@ -1778,7 +1885,7 @@ def __img_norm_layer__(name, input, size, norm_type, scale, power, assert input.num_filters is not None num_channels = input.num_filters - Layer( + l = Layer( name=name, type=LayerType.NORM_LAYER, inputs=Input( input.name, norm=Norm(norm_type=norm_type, channels=num_channels, size=size, @@ -1788,7 +1895,8 @@ def __img_norm_layer__(name, input, size, norm_type, scale, power, **ExtraLayerAttribute.to_kwargs(layer_attr) ) return LayerOutput(name, layer_type=LayerType.NORM_LAYER, parents=[input], - num_filters=num_channels, img_norm_type=norm_type) + num_filters=num_channels, img_norm_type=norm_type, + size=l.config.size) @wrap_name_default("crmnorm") @@ -1913,7 +2021,7 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None, num_channels = input.size assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \ (batch_norm_type == "cudnn_batch_norm") - Layer( + l = Layer( name=name, inputs=Input(input.name, image=Image(channels=num_channels), @@ -1929,7 +2037,8 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None, return LayerOutput(name=name, layer_type=LayerType.BATCH_NORM_LAYER, parents=[input], activation=act, - num_filters=num_channels) + num_filters=num_channels, + size=l.config.size) @wrap_name_default() @@ -2034,7 +2143,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, if each_input.num_filters is not None: num_filters = each_input.num_filters - Layer( + l = Layer( name=name, type=LayerType.ADDTO_LAYER, inputs=ipts_for_layer, bias=ParamAttr.to_bias(bias_attr), active_type=act.name, @@ -2042,7 +2151,8 @@ def addto_layer(input, act=None, name=None, bias_attr=None, ) return LayerOutput(name, LayerType.ADDTO_LAYER, parents=input, - activation=act, num_filters=num_filters) + activation=act, num_filters=num_filters, + size=l.config.size) @wrap_act_default(act=IdentityActivation()) @@ -2651,13 +2761,14 @@ def maxid_layer(input, name=None, layer_attr=None): """ assert isinstance(input, LayerOutput) - Layer(name=name, + l = Layer(name=name, type='maxid', inputs=[input.name], **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput(name=name, layer_type=LayerType.MAXID_LAYER, - parents=[input]) + parents=[input], + size=l.config.size) @wrap_name_default() @@ -2686,13 +2797,14 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None): assert isinstance(input1, LayerOutput) assert isinstance(input2, LayerOutput) - Layer(name=name, + l = Layer(name=name, type=LayerType.OUT_PROD_LAYER, inputs=[input1.name, input2.name], **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput(name=name, layer_type=LayerType.OUT_PROD_LAYER, - parents=[input1, input2]) + parents=[input1, input2], + size=l.config.size) @wrap_name_default() @@ -2721,13 +2833,14 @@ def eos_layer(input, eos_id, name=None, layer_attr=None): :return: LayerOutput object. :rtype: LayerOutput """ - Layer(name=name, + l = Layer(name=name, type=LayerType.EOSID_LAYER, eos_id=eos_id, inputs=[input.name], **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput(name=name, layer_type=LayerType.EOSID_LAYER, - parents=[input]) + parents=[input], + size=l.config.size) @wrap_name_default() @@ -2892,7 +3005,7 @@ def regression_cost(input, label, weight=None, name=None, Layer(inputs=ipts, type="square_error", name=name, **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.COST, parents=parents) + return LayerOutput(name, LayerType.COST, parents=parents, size=1) @wrap_name_default("cost") @@ -2944,7 +3057,7 @@ def classification_cost(input, label, weight=None, name=None, for each_evaluator in evaluator: __add_evaluator__(each_evaluator) - return LayerOutput(name, LayerType.COST, parents=parents) + return LayerOutput(name, LayerType.COST, parents=parents, size=1) def conv_operator(img, filter, filter_size, num_filters, @@ -3326,13 +3439,14 @@ def sampling_id_layer(input, name=None, layer_attr=None): :return: LayerOutput object. :rtype: LayerOutput """ - Layer( + l = Layer( name=name, type=LayerType.SAMPLING_ID_LAYER, inputs=[Input(input.name)], **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.SAMPLING_ID_LAYER, input) + return LayerOutput(name, LayerType.SAMPLING_ID_LAYER, input, + size=l.config.size) @wrap_name_default() @@ -3373,7 +3487,8 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0, inputs=[Input(input.name)], **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.SLOPE_INTERCEPT_LAYER, input) + return LayerOutput(name, LayerType.SLOPE_INTERCEPT_LAYER, input, + size=input.size) @wrap_name_default() @@ -3512,7 +3627,7 @@ def block_expand_layer(input, if num_channels is None: assert input.num_filters is not None num_channels = input.num_filters - Layer(name=name, + l = Layer(name=name, inputs=Input(input.name, block_expand=BlockExpand(channels=num_channels, block_x=block_x, @@ -3525,7 +3640,8 @@ def block_expand_layer(input, **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.BLOCK_EXPAND, parents=[input]) + return LayerOutput(name, LayerType.BLOCK_EXPAND, parents=[input], + size=l.config.size) @wrap_name_default() @@ -3586,13 +3702,14 @@ def maxout_layer(input, assert input.num_filters is not None num_channels = input.num_filters assert num_channels % groups == 0 - Layer(name=name, + l = Layer(name=name, inputs=Input(input.name, maxout=MaxOut(channels=num_channels, groups=groups)), type=LayerType.MAXOUT, **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.MAXOUT, parents=[input]) + return LayerOutput(name, LayerType.MAXOUT, parents=[input], + size=l.config.size) @wrap_name_default() @@ -3718,7 +3835,10 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None, parents = [input, label] if weight is not None: parents.append(weight) - return LayerOutput(name, LayerType.CRF_LAYER, parents, size=size) + # The size for LayerOutput means the dimension of the output. + # It's different from the meaning of crf layer, which is the number of + # classes. + return LayerOutput(name, LayerType.CRF_LAYER, parents, size=1) @wrap_name_default() @@ -3766,7 +3886,10 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None, parents = [input] if label is not None: parents.append(label) - return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=size) + # The size for LayerOutput means the dimension of the output. + # It's different from the meaning of crf layer, which is the number of + # classes. + return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1) @wrap_bias_attr_default(has_bias=True) @wrap_name_default() @@ -3834,7 +3957,7 @@ def nce_layer(input, label, num_classes, weight=None, ipts_for_layer.append(weight.name) parents.append(weight) - Layer( + l = Layer( name=name, type=LayerType.NCE_LAYER, num_classes=num_classes, @@ -3844,7 +3967,8 @@ def nce_layer(input, label, num_classes, weight=None, bias=ParamAttr.to_bias(bias_attr), **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.NCE_LAYER, parents=parents) + return LayerOutput(name, LayerType.NCE_LAYER, parents=parents, + size=l.config.size) """ following are cost Layers. @@ -3919,7 +4043,7 @@ def rank_cost(left, right, label, weight=None, name=None, coeff=1.0, layer_attr= **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.RANK_COST, parents=parents) + return LayerOutput(name, LayerType.RANK_COST, parents=parents, size=1) @wrap_name_default() @@ -3971,7 +4095,8 @@ def lambda_cost(input, score, name, NDCG_num=5, max_sort_size=-1, layer_attr=Non **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.LAMBDA_COST, parents=[input, score]) + return LayerOutput(name, LayerType.LAMBDA_COST, parents=[input, score], + size=1) @wrap_name_default() @@ -3982,14 +4107,13 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): .. code-block:: python - cost = cross_entropy(input, label) + cost = cross_entropy(input=input_layer, + label=label_layer) :param input: The first input layer. :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param type: The type of cost. - :type type: basestring. :param name: The name of this layers. It is not necessary. :type name: None|basestring. :param coeff: The coefficient affects the gradient in the backward. @@ -4006,7 +4130,8 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): coeff=coeff, **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=[input, label]) + return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=[input, label], + size=1) @wrap_name_default() @@ -4019,14 +4144,13 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, .. code-block:: python - cost = cross_entropy_with_selfnorm(input, label) + cost = cross_entropy_with_selfnorm(input=input_layer, + label=label_layer) :param input: The first input layer. :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param type: The type of cost. - :type type: basestring. :param name: The name of this layers. It is not necessary. :type name: None|basestring. :param coeff: The coefficient affects the gradient in the backward. @@ -4048,7 +4172,39 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, return LayerOutput(name, LayerType.CROSS_ENTROPY_WITH_SELFNORM, - parents=[input, label]) + parents=[input, label], size=1) + + +@wrap_name_default() +@layer_support() +def sum_cost(input, name=None, layer_attr=None): + """ + A loss layer which calculate the sum of the input as loss + + .. code-block:: python + + cost = sum_cost(input=input_layer) + + :param input: The first input layer. + :type input: LayerOutput. + :param name: The name of this layers. It is not necessary. + :type name: None|basestring. + :param layer_attr: Extra Layer Attribute. + :type layer_attr: ExtraLayerAttribute + :return: LayerOutput object. + :rtype: LayerOutput. + """ + assert isinstance(input, LayerOutput) + Layer(name=name, + type=LayerType.SUM_COST, + inputs=[input.name], + **ExtraLayerAttribute.to_kwargs(layer_attr) + ) + + return LayerOutput(name, + LayerType.SUM_COST, + parents=[input], + size=1) @wrap_name_default() @@ -4059,7 +4215,8 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): .. code-block:: python - cost = huber_cost(input, label) + cost = huber_cost(input=input_layer, + label=label_layer) :param input: The first input layer. :type input: LayerOutput. @@ -4083,7 +4240,7 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): coeff=coeff, **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.HUBER, parents=[input, label]) + return LayerOutput(name, LayerType.HUBER, parents=[input, label], size=1) @wrap_name_default() @@ -4095,7 +4252,8 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0, .. code-block:: python - cost = multi_binary_label_cross_entropy(input, label) + cost = multi_binary_label_cross_entropy(input=input_layer, + label=label_layer) :param input: The first input layer. :type input: LayerOutput @@ -4126,4 +4284,4 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0, **ExtraLayerAttribute.to_kwargs(layer_attr) ) return LayerOutput(name, LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY, - parents=[input, label]) + parents=[input, label], size=1) diff --git a/python/paddle/trainer_config_helpers/math.py b/python/paddle/trainer_config_helpers/math.py index e35849b77ac531b4a4676019e01285af67925bd9..7d7bb2914859fbda222b41a7bbe568b283b32487 100644 --- a/python/paddle/trainer_config_helpers/math.py +++ b/python/paddle/trainer_config_helpers/math.py @@ -13,10 +13,11 @@ # limitations under the License. from .layers import LayerOutput, mixed_layer, identity_projection, \ - slope_intercept_layer + slope_intercept_layer, scaling_layer, repeat_layer from .attrs import is_compatible_with from .default_decorators import * import activations as act +from paddle.trainer.config_parser import logger __all__ = [] @@ -40,7 +41,21 @@ register_unary_math_op('square', act.SquareActivation()) def add(layeroutput, other): if is_compatible_with(other, float): return slope_intercept_layer(input=layeroutput, intercept=other) - assert isinstance(other, LayerOutput) + if not isinstance(other, LayerOutput): + logger.fatal("LayerOutput can only be added with" + " another LayerOutput or a number") + if layeroutput.size == other.size: + return mixed_layer(input=[identity_projection(input=layeroutput), + identity_projection(input=other)]) + if other.size != 1 and layeroutput.size != 1: + logger.fatal("Two LayerOutput can be added only if they have equal size" + " or one of their sizes is 1. sizes are %s and %s" % + (layeroutput.size, other.size)) + elif layeroutput.size == 1: + tmp = layeroutput + layeroutput = other + other = tmp + other = repeat_layer(other, layeroutput.size) return mixed_layer(input=[identity_projection(input=layeroutput), identity_projection(input=other)]) @@ -50,10 +65,11 @@ LayerOutput.__add__ = add def sub(layeroutput, other): if is_compatible_with(other, float): return slope_intercept_layer(input=layeroutput, intercept=other) - assert isinstance(other, LayerOutput) + if not isinstance(other, LayerOutput): + logger.fatal("LayerOutput can only be subtracted with" + " another Layeroutput or a number") neg = slope_intercept_layer(input=other, slope=-1.0) - return mixed_layer(input=[identity_projection(input=layeroutput), - identity_projection(input=neg)]) + return add(layeroutput, neg) LayerOutput.__sub__ = sub @@ -62,3 +78,20 @@ def rsub(layeroutput, other): return add(neg, other) LayerOutput.__rsub__ = rsub + +def mul(layeroutput, other): + if is_compatible_with(other, float): + return slope_intercept_layer(input=layeroutput, slope=other) + if not isinstance(other, LayerOutput): + logger.fatal("LayerOutput can only be multiplied with" + " another Layeroutput or a number") + elif layeroutput.size == 1: + return scaling_layer(input=other, weight=layeroutput) + elif other.size == 1: + return scaling_layer(input=layeroutput, weight=other) + else: + logger.fatal("At least one of the operand of '*' must be a number" + " or a LayerOutput with size=1") + +LayerOutput.__mul__ = mul +LayerOutput.__rmul__ = mul diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh index cafc2142f25c74d54ec8a1ab937db23306da2904..e84e2a4b7f36a42999b2decff2b277cbbd56cda0 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh @@ -11,8 +11,8 @@ test_sequence_pooling test_lstmemory_layer test_grumemory_layer last_first_seq test_expand_layer test_ntm_layers test_hsigmoid img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight -test_bilinear_interp test_maxout test_bi_grumemory math_ops -test_spilit_datasource) +test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops +test_split_datasource) for conf in ${configs[*]} diff --git a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py index fe515b7029336d093df5428ab8ac1c65a2d4e98a..7c2770c616dc11b1c69450dff9085c9943a2ff8f 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py +++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py @@ -19,6 +19,12 @@ y = x + y y = y - x y = y - 2 y = 2 - y - +y = 2 * y +y = y * 3 +z= data_layer(name='data_2', size=1) +y = y * z +y = z * y +y = y + z +y = z + y outputs(y) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr index 1767445c44bf5c0ea7c1149ad9fef2dd92508c54..da8da1b541f37a09654202f68232b99e4dac9f61 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr @@ -209,8 +209,129 @@ layers { slope: 1.0 intercept: 2 } +layers { + name: "__slope_intercept_layer_6__" + type: "slope_intercept" + size: 100 + active_type: "" + inputs { + input_layer_name: "__slope_intercept_layer_5__" + } + slope: 2 + intercept: 0.0 +} +layers { + name: "__slope_intercept_layer_7__" + type: "slope_intercept" + size: 100 + active_type: "" + inputs { + input_layer_name: "__slope_intercept_layer_6__" + } + slope: 3 + intercept: 0.0 +} +layers { + name: "data_2" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__scaling_layer_0__" + type: "scaling" + size: 100 + active_type: "" + inputs { + input_layer_name: "data_2" + } + inputs { + input_layer_name: "__slope_intercept_layer_7__" + } +} +layers { + name: "__scaling_layer_1__" + type: "scaling" + size: 100 + active_type: "" + inputs { + input_layer_name: "data_2" + } + inputs { + input_layer_name: "__scaling_layer_0__" + } +} +layers { + name: "__repeat_layer_0__" + type: "featmap_expand" + size: 100 + active_type: "" + inputs { + input_layer_name: "data_2" + } + num_filters: 100 +} +layers { + name: "__mixed_2__" + type: "mixed" + size: 100 + active_type: "" + inputs { + input_layer_name: "__scaling_layer_1__" + proj_conf { + type: "identity" + name: "___mixed_2__.w0" + input_size: 100 + output_size: 100 + } + } + inputs { + input_layer_name: "__repeat_layer_0__" + proj_conf { + type: "identity" + name: "___mixed_2__.w1" + input_size: 100 + output_size: 100 + } + } +} +layers { + name: "__repeat_layer_1__" + type: "featmap_expand" + size: 100 + active_type: "" + inputs { + input_layer_name: "data_2" + } + num_filters: 100 +} +layers { + name: "__mixed_3__" + type: "mixed" + size: 100 + active_type: "" + inputs { + input_layer_name: "__mixed_2__" + proj_conf { + type: "identity" + name: "___mixed_3__.w0" + input_size: 100 + output_size: 100 + } + } + inputs { + input_layer_name: "__repeat_layer_1__" + proj_conf { + type: "identity" + name: "___mixed_3__.w1" + input_size: 100 + output_size: 100 + } + } +} +input_layer_names: "data_2" input_layer_names: "data" -output_layer_names: "__slope_intercept_layer_5__" +output_layer_names: "__mixed_3__" sub_models { name: "root" layer_names: "data" @@ -228,8 +349,18 @@ sub_models { layer_names: "__slope_intercept_layer_3__" layer_names: "__slope_intercept_layer_4__" layer_names: "__slope_intercept_layer_5__" + layer_names: "__slope_intercept_layer_6__" + layer_names: "__slope_intercept_layer_7__" + layer_names: "data_2" + layer_names: "__scaling_layer_0__" + layer_names: "__scaling_layer_1__" + layer_names: "__repeat_layer_0__" + layer_names: "__mixed_2__" + layer_names: "__repeat_layer_1__" + layer_names: "__mixed_3__" + input_layer_names: "data_2" input_layer_names: "data" - output_layer_names: "__slope_intercept_layer_5__" + output_layer_names: "__mixed_3__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr index 5261cf0c44943689a957bb99c21075bb7341cd49..f6045fe1f68255daf0d9b5ab05034eec633e4503 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr @@ -23,6 +23,17 @@ layers { size: 10 active_type: "" } +layers { + name: "__fc_layer_0__" + type: "fc" + size: 4 + active_type: "tanh" + inputs { + input_layer_name: "input" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} layers { name: "__ctc_layer_0__" type: "ctc" @@ -36,17 +47,6 @@ layers { } norm_by_times: false } -layers { - name: "__fc_layer_0__" - type: "fc" - size: 4 - active_type: "tanh" - inputs { - input_layer_name: "input" - input_parameter_name: "___fc_layer_0__.w0" - } - bias_parameter_name: "___fc_layer_0__.wbias" -} layers { name: "crf_label" type: "data" @@ -191,6 +191,16 @@ layers { } coeff: 1.0 } +layers { + name: "__sum_cost_0__" + type: "sum_cost" + size: 1 + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + } + coeff: 1.0 +} parameters { name: "___fc_layer_0__.w0" size: 800 @@ -241,14 +251,15 @@ output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__huber_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" +output_layer_names: "__sum_cost_0__" sub_models { name: "root" layer_names: "input" layer_names: "labels" layer_names: "probs" layer_names: "xe-label" - layer_names: "__ctc_layer_0__" layer_names: "__fc_layer_0__" + layer_names: "__ctc_layer_0__" layer_names: "crf_label" layer_names: "__crf_layer_0__" layer_names: "left" @@ -264,6 +275,7 @@ sub_models { layer_names: "huber_label" layer_names: "__huber_cost_0__" layer_names: "__multi_binary_label_cross_entropy_0__" + layer_names: "__sum_cost_0__" input_layer_names: "input" input_layer_names: "labels" input_layer_names: "crf_label" @@ -284,6 +296,7 @@ sub_models { output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__huber_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" + output_layer_names: "__sum_cost_0__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..8b0a8f2146b709ee67981049da8061597e1716be --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_spp_layer.protostr @@ -0,0 +1,34 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 3200 + active_type: "" +} +layers { + name: "__spp_0__" + type: "spp" + size: 80 + active_type: "" + inputs { + input_layer_name: "data" + spp_conf { + pool_type: "max-projection" + pyramid_height: 2 + channels: 16 + img_size: 10 + img_size_y: 20 + } + } +} +input_layer_names: "data" +output_layer_names: "__spp_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__spp_0__" + input_layer_names: "data" + output_layer_names: "__spp_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py index 64b45f4ded10b09ec4a7e77499e2d7b21215f430..cfaf2da001106289db09a87f6d9935e8f07ceca0 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py @@ -11,8 +11,9 @@ labels = data_layer(name='labels', size=5000) probs = data_layer(name='probs', size=10) xe_label = data_layer(name='xe-label', size=10) +hidden = fc_layer(input=seq_in, size=4) outputs(ctc_layer(input=seq_in, label=labels), - crf_layer(input=fc_layer(input=seq_in, size=4), + crf_layer(input=hidden, label=data_layer(name='crf_label', size=4)), rank_cost(left=data_layer(name='left', size=1), right=data_layer(name='right', size=1), @@ -23,4 +24,5 @@ outputs(ctc_layer(input=seq_in, label=labels), cross_entropy_with_selfnorm(input=probs, label=xe_label), huber_cost(input=data_layer(name='huber_probs', size=1), label=data_layer(name='huber_label', size=1)), - multi_binary_label_cross_entropy(input=probs, label=xe_label)) + multi_binary_label_cross_entropy(input=probs, label=xe_label), + sum_cost(input=hidden)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..178387d3cf1d16bd391aad8e08950554e83fbeff --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py @@ -0,0 +1,16 @@ +from paddle.trainer_config_helpers import * + +settings( + batch_size=100, + learning_rate=1e-5 +) + +data = data_layer(name='data', size=3200) + +spp = spp_layer(input=data, + pyramid_height=2, + num_channels=16, + pool_type=MaxPooling(), + img_width=10) + +outputs(spp)