提交 cdac60f6 编写于 作者: Q qijun

add SpatialPyramidPoolLayer c++ support

上级 46bd5f53
...@@ -91,6 +91,7 @@ extern void hl_expand_feature2col( ...@@ -91,6 +91,7 @@ extern void hl_expand_feature2col(
* @param[in] paddingH padding height. * @param[in] paddingH padding height.
* @param[in] paddingW padding width. * @param[in] paddingW padding width.
* @param[out] tgtData output data. * @param[out] tgtData output data.
* @param[in] tgtStride output data stride.
* *
*/ */
extern void hl_maxpool_forward( extern void hl_maxpool_forward(
...@@ -100,7 +101,8 @@ extern void hl_maxpool_forward( ...@@ -100,7 +101,8 @@ extern void hl_maxpool_forward(
const int pooledH, const int pooledW, const int pooledH, const int pooledW,
const int sizeX, const int sizeY, const int sizeX, const int sizeY,
const int strideH, const int strideW, const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData); const int paddingH, const int paddingW,
real* tgtData, const int tgtStride);
/** /**
* @brief Maximum pool backward. * @brief Maximum pool backward.
...@@ -123,6 +125,7 @@ extern void hl_maxpool_forward( ...@@ -123,6 +125,7 @@ extern void hl_maxpool_forward(
* @param[in] paddingH padding height. * @param[in] paddingH padding height.
* @param[in] paddingW padding width. * @param[in] paddingW padding width.
* @param[out] targetGrad output grad. * @param[out] targetGrad output grad.
* @param[in] outStride output grad data stride.
* *
*/ */
extern void hl_maxpool_backward( extern void hl_maxpool_backward(
...@@ -135,7 +138,7 @@ extern void hl_maxpool_backward( ...@@ -135,7 +138,7 @@ extern void hl_maxpool_backward(
const int strideH, const int strideW, const int strideH, const int strideW,
const int paddingH, const int paddingW, const int paddingH, const int paddingW,
real scaleA, real scaleB, real scaleA, real scaleB,
real* targetGrad); real* targetGrad, const int outStride);
/** /**
* @brief Averge pool forward. * @brief Averge pool forward.
...@@ -154,6 +157,7 @@ extern void hl_maxpool_backward( ...@@ -154,6 +157,7 @@ extern void hl_maxpool_backward(
* @param[in] paddingH padding height. * @param[in] paddingH padding height.
* @param[in] paddingW padding width. * @param[in] paddingW padding width.
* @param[out] tgtData output data. * @param[out] tgtData output data.
* @param[in] tgtStride output data stride.
* *
*/ */
extern void hl_avgpool_forward( extern void hl_avgpool_forward(
...@@ -163,7 +167,8 @@ extern void hl_avgpool_forward( ...@@ -163,7 +167,8 @@ extern void hl_avgpool_forward(
const int pooledH, const int pooledW, const int pooledH, const int pooledW,
const int sizeX, const int sizeY, const int sizeX, const int sizeY,
const int strideH, const int strideW, const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData); const int paddingH, const int paddingW,
real* tgtData, const int tgtStride);
/** /**
* @brief Maximum pool backward. * @brief Maximum pool backward.
...@@ -184,6 +189,7 @@ extern void hl_avgpool_forward( ...@@ -184,6 +189,7 @@ extern void hl_avgpool_forward(
* @param[in] scaleA scale. * @param[in] scaleA scale.
* @param[in] scaleB scale. * @param[in] scaleB scale.
* @param[out] backGrad output grad. * @param[out] backGrad output grad.
* @param[in] outStride output grad data stride.
* *
*/ */
extern void hl_avgpool_backward( extern void hl_avgpool_backward(
...@@ -195,7 +201,7 @@ extern void hl_avgpool_backward( ...@@ -195,7 +201,7 @@ extern void hl_avgpool_backward(
const int strideH, const int strideW, const int strideH, const int strideW,
int paddingH, int paddingW, int paddingH, int paddingW,
real scaleA, real scaleB, real scaleA, real scaleB,
real* backGrad); real* backGrad, const int outStride);
/** /**
* @brief Cross-map-respose normalize forward. * @brief Cross-map-respose normalize forward.
......
...@@ -44,7 +44,8 @@ inline void hl_maxpool_forward( ...@@ -44,7 +44,8 @@ inline void hl_maxpool_forward(
const int pooledH, const int pooledW, const int pooledH, const int pooledW,
const int sizeX, const int sizeY, const int sizeX, const int sizeY,
const int strideH, const int strideW, const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {} const int paddingH, const int paddingW,
real* tgtData, const int tgtStride) {}
inline void hl_maxpool_backward( inline void hl_maxpool_backward(
const int frameCnt, const real* inputData, const int frameCnt, const real* inputData,
...@@ -56,7 +57,7 @@ inline void hl_maxpool_backward( ...@@ -56,7 +57,7 @@ inline void hl_maxpool_backward(
const int strideH, const int strideW, const int strideH, const int strideW,
const int paddingH, const int paddingW, const int paddingH, const int paddingW,
real scaleA, real scaleB, real scaleA, real scaleB,
real* targetGrad) {} real* targetGrad, const int outStride) {}
inline void hl_avgpool_forward( inline void hl_avgpool_forward(
const int frameCnt, const real* inputData, const int frameCnt, const real* inputData,
...@@ -65,7 +66,8 @@ inline void hl_avgpool_forward( ...@@ -65,7 +66,8 @@ inline void hl_avgpool_forward(
const int pooledH, const int pooledW, const int pooledH, const int pooledW,
const int sizeX, const int sizeY, const int sizeX, const int sizeY,
const int strideH, const int strideW, const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {} const int paddingH, const int paddingW,
real* tgtData, const int tgtStride) {}
inline void hl_avgpool_backward( inline void hl_avgpool_backward(
const int frameCnt, const real* outGrad, const int frameCnt, const real* outGrad,
...@@ -76,7 +78,7 @@ inline void hl_avgpool_backward( ...@@ -76,7 +78,7 @@ inline void hl_avgpool_backward(
const int strideH, const int strideW, const int strideH, const int strideW,
int paddingH, int paddingW, int paddingH, int paddingW,
real scaleA, real scaleB, real scaleA, real scaleB,
real* backGrad) {} real* backGrad, const int outStride) {}
inline void hl_CMRNorm_forward( inline void hl_CMRNorm_forward(
size_t frameCnt, const real* in, real* scale, real* out, size_t frameCnt, const real* in, real* scale, real* out,
......
...@@ -152,7 +152,7 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData, ...@@ -152,7 +152,7 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
const int ksizeW, const int ksizeH, const int ksizeW, const int ksizeH,
const int strideH, const int strideW, const int strideH, const int strideW,
const int offsetH, const int offsetW, const int offsetH, const int offsetW,
real* tgtData) { real* tgtData, const int tgtStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x; int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) { if (index < nthreads) {
int pw = index % pooledW; int pw = index % pooledW;
...@@ -173,7 +173,9 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData, ...@@ -173,7 +173,9 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
maxval = inputData[h * width + w]; maxval = inputData[h * width + w];
} }
} }
tgtData[index] = maxval; int tgtIndex = index % (pooledW * pooledH * channels) +
frameNum * tgtStride;
tgtData[tgtIndex] = maxval;
} }
} }
...@@ -184,7 +186,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData, ...@@ -184,7 +186,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
const int sizeX, const int sizeY, const int sizeX, const int sizeY,
const int strideH, const int strideW, const int strideH, const int strideW,
const int paddingH, const int paddingW, const int paddingH, const int paddingW,
real* tgtData) { real* tgtData, const int tgtStride) {
int num_kernels = pooledH * pooledW * channels * frameCnt; int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024; int blocks = (num_kernels + 1024 - 1) / 1024;
...@@ -194,7 +196,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData, ...@@ -194,7 +196,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
KeMaxPoolForward<<< grid, threads, 0, STREAM_DEFAULT >>> KeMaxPoolForward<<< grid, threads, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, channels, height, width, (num_kernels, inputData, channels, height, width,
pooledH, pooledW, sizeX, sizeY, strideH, strideW, pooledH, pooledW, sizeX, sizeY, strideH, strideW,
paddingH, paddingW, tgtData); paddingH, paddingW, tgtData, tgtStride);
CHECK_SYNC("hl_maxpool_forward failed"); CHECK_SYNC("hl_maxpool_forward failed");
} }
...@@ -207,7 +209,7 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData, ...@@ -207,7 +209,7 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
const int strideH, const int strideW, const int strideH, const int strideW,
const int padH, const int padW, const int padH, const int padW,
real scaleA, real scaleB, real scaleA, real scaleB,
real* targetGrad) { real* targetGrad, const int outStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x; int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) { if (index < nthreads) {
// find out the local index // find out the local index
...@@ -223,8 +225,8 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData, ...@@ -223,8 +225,8 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0; int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0;
real gradient = 0; real gradient = 0;
real input = inputData[index]; real input = inputData[index];
outData += (frameNum * channels + offsetC) * pooledH * pooledW; outData += (frameNum * outStride + offsetC * pooledH * pooledW);
outGrad += (frameNum * channels + offsetC) * pooledH * pooledW; outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
for (int ph = phstart; ph < phend; ++ph) { for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) { for (int pw = pwstart; pw < pwend; ++pw) {
if (input == outData[ph * pooledW + pw]) { if (input == outData[ph * pooledW + pw]) {
...@@ -246,7 +248,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData, ...@@ -246,7 +248,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
const int strideH, const int strideW, const int strideH, const int strideW,
const int paddingH, const int paddingW, const int paddingH, const int paddingW,
real scaleA, real scaleB, real scaleA, real scaleB,
real* targetGrad) { real* targetGrad, const int outStride) {
int num_kernels = height * width * channels * frameCnt; int num_kernels = height * width * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024; int blocks = (num_kernels + 1024 - 1) / 1024;
...@@ -257,7 +259,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData, ...@@ -257,7 +259,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
strideH, strideW, strideH, strideW,
paddingH, paddingW, paddingH, paddingW,
scaleA, scaleB, scaleA, scaleB,
targetGrad); targetGrad, outStride);
CHECK_SYNC("hl_maxpool_backward"); CHECK_SYNC("hl_maxpool_backward");
} }
...@@ -268,7 +270,7 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData, ...@@ -268,7 +270,7 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
const int sizeX, const int sizeY, const int sizeX, const int sizeY,
const int strideH, const int strideW, const int strideH, const int strideW,
const int padH, const int padW, const int padH, const int padW,
real* tgtData) { real* tgtData, const int tgtStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x; int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) { if (index < nthreads) {
int pw = index % pooledW; int pw = index % pooledW;
...@@ -293,7 +295,9 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData, ...@@ -293,7 +295,9 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
aveval += inputData[h * width + w]; aveval += inputData[h * width + w];
} }
} }
tgtData[index] = aveval / pool_size; int tgtIndex = index % (pooledW * pooledH * channels) +
frameNum * tgtStride;
tgtData[tgtIndex] = aveval / pool_size;
} }
} }
...@@ -303,14 +307,15 @@ void hl_avgpool_forward(const int frameCnt, const real* inputData, ...@@ -303,14 +307,15 @@ void hl_avgpool_forward(const int frameCnt, const real* inputData,
const int pooledH, const int pooledW, const int pooledH, const int pooledW,
const int sizeX, const int sizeY, const int sizeX, const int sizeY,
const int strideH, const int strideW, const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) { const int paddingH, const int paddingW,
real* tgtData, const int tgtStride) {
int num_kernels = pooledH * pooledW * channels * frameCnt; int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024; int blocks = (num_kernels + 1024 - 1) / 1024;
KeAvgPoolForward<<< blocks, 1024, 0, STREAM_DEFAULT >>> KeAvgPoolForward<<< blocks, 1024, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, channels, (num_kernels, inputData, channels,
height, width, pooledH, pooledW, height, width, pooledH, pooledW,
sizeX, sizeY, strideH, strideW, sizeX, sizeY, strideH, strideW,
paddingH, paddingW, tgtData); paddingH, paddingW, tgtData, tgtStride);
CHECK_SYNC("hl_avgpool_forward failed"); CHECK_SYNC("hl_avgpool_forward failed");
} }
...@@ -322,7 +327,7 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad, ...@@ -322,7 +327,7 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
const int strideH, const int strideW, const int strideH, const int strideW,
const int padH, const int padW, const int padH, const int padW,
real scaleA, real scaleB, real scaleA, real scaleB,
real* tgtGrad) { real* tgtGrad, const int outStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x; int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) { if (index < nthreads) {
int offsetW = index % width + padW; int offsetW = index % width + padW;
...@@ -335,7 +340,8 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad, ...@@ -335,7 +340,8 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0; int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0;
int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0; int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0;
real gradient = 0; real gradient = 0;
outGrad += (frameNum * channels + offsetC) * pooledH * pooledW; outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
for (int ph = phstart; ph < phend; ++ph) { for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) { for (int pw = pwstart; pw < pwend; ++pw) {
...@@ -360,7 +366,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad, ...@@ -360,7 +366,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
const int strideH, const int strideW, const int strideH, const int strideW,
const int paddingH, const int paddingW, const int paddingH, const int paddingW,
real scaleA, real scaleB, real scaleA, real scaleB,
real* backGrad) { real* backGrad, const int outStride) {
int num_kernels = height * width * channels * frameCnt; int num_kernels = height * width * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024; int blocks = (num_kernels + 1024 - 1) / 1024;
...@@ -370,7 +376,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad, ...@@ -370,7 +376,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
strideH, strideW, strideH, strideW,
paddingH, paddingW, paddingH, paddingW,
scaleA, scaleB, scaleA, scaleB,
backGrad); backGrad, outStride);
CHECK_SYNC("hl_avgpool_backward failed"); CHECK_SYNC("hl_avgpool_backward failed");
} }
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PoolProjection.h"
namespace paddle {
REGISTER_PROJECTION_CREATE_FUNC(pool2, &PoolProjection::create);
PoolProjection* PoolProjection::create(const ProjectionConfig& config,
ParameterPtr parameter, bool useGpu) {
const std::string& pool = config.pool_conf().pool_type();
if (pool == "max") {
return new MaxPoolProjection(config, parameter, useGpu);
} else if (pool == "avg") {
return new AvgPoolProjection(config, parameter, useGpu);
} else {
LOG(FATAL) << "Unknown pool type: " << pool;
return nullptr;
}
}
void MaxPoolProjection::forward() {
MatrixPtr inputV = in_->value;
MatrixPtr outV = out_->value;
outV->maxPoolForward(*inputV, imgSizeY_, imgSize_, channels_,
sizeX_, sizeY_, strideY_, stride_,
outputY_, outputX_, confPaddingY_, confPadding_);
}
void MaxPoolProjection::backward(const UpdateCallback& callback) {
(void)callback;
MatrixPtr outGrad = out_->grad;
MatrixPtr inputV = in_->value;
MatrixPtr outV = out_->value;
MatrixPtr inputGrad = in_->grad;
if (NULL == inputGrad) {
return;
}
inputGrad->maxPoolBackward(*inputV, imgSizeY_, imgSize_, *outGrad, *outV,
sizeX_, sizeY_,
strideY_, stride_, outputY_, outputX_, 1, 1,
confPaddingY_, confPadding_);
}
void AvgPoolProjection::forward() {
MatrixPtr inputV = in_->value;
MatrixPtr outV = out_->value;
outV->avgPoolForward(*inputV, imgSizeY_, imgSize_, channels_,
sizeX_, sizeY_, strideY_, stride_,
outputY_, outputX_, confPaddingY_, confPadding_);
}
void AvgPoolProjection::backward(const UpdateCallback& callback) {
(void)callback;
MatrixPtr outputGrad = out_->grad;
MatrixPtr inputGrad = in_->grad;
if (NULL == inputGrad) {
return;
}
inputGrad->avgPoolBackward(*outputGrad, imgSizeY_, imgSize_,
sizeX_, sizeY_, strideY_, stride_,
outputY_, outputX_, 1, 1,
confPaddingY_, confPadding_);
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Projection.h"
namespace paddle {
class PoolProjection : public Projection {
protected:
size_t imgSizeY_, imgSize_;
size_t outputY_, outputX_;
size_t strideY_, stride_;
size_t sizeY_, sizeX_;
int confPaddingY_, confPadding_;
size_t channels_;
std::string poolType_;
public:
PoolProjection(const ProjectionConfig& config, ParameterPtr parameter,
bool useGpu)
: Projection(config, parameter, useGpu) {
const PoolConfig& conf = config_.pool_conf();
poolType_ = conf.pool_type();
channels_ = conf.channels();
sizeX_ = conf.size_x();
stride_ = conf.stride();
outputX_ = conf.output_x();
imgSize_ = conf.img_size();
confPadding_ = conf.padding();
sizeY_ = conf.has_size_y() ? conf.size_y() : conf.size_x();
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride();
confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
}
static PoolProjection* create(const ProjectionConfig& config,
ParameterPtr parameter, bool useGpu);
const std::string& getPoolType() const { return poolType_; }
};
class MaxPoolProjection : public PoolProjection {
public:
MaxPoolProjection(const ProjectionConfig& config, ParameterPtr parameter,
bool useGpu)
: PoolProjection(config, parameter, useGpu) {}
virtual void forward();
virtual void backward(const UpdateCallback& callback = nullptr);
};
class AvgPoolProjection : public PoolProjection {
public:
AvgPoolProjection(const ProjectionConfig& config, ParameterPtr parameter,
bool useGpu)
: PoolProjection(config, parameter, useGpu) {}
virtual void forward();
virtual void backward(const UpdateCallback& callback = nullptr);
};
} // namespace paddle
...@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/parameter/Parameter.h"
#include "ModelConfig.pb.h"
#include "Layer.h" #include "Layer.h"
#include "ModelConfig.pb.h"
#include "paddle/parameter/Parameter.h"
namespace paddle { namespace paddle {
...@@ -28,6 +27,11 @@ namespace paddle { ...@@ -28,6 +27,11 @@ namespace paddle {
Projection::registrar_.registerClass<__class_name>(#__type_name); \ Projection::registrar_.registerClass<__class_name>(#__type_name); \
}) })
#define REGISTER_PROJECTION_CREATE_FUNC(__type_name, createFunction) \
static InitFunction __reg_type_##__type_name([]() { \
Projection::registrar_.registerClass(#__type_name, createFunction); \
})
/** /**
* A projection takes one Argument as input, calculate the result and add it * A projection takes one Argument as input, calculate the result and add it
* to output Argument. * to output Argument.
...@@ -50,7 +54,8 @@ public: ...@@ -50,7 +54,8 @@ public:
registrar_; registrar_;
/** /**
* Forward propagation. If backward() will be called, in and out must be kept valid until then. * Forward propagation. If backward() will be called, in and out must be kept
* valid until then.
* @param in input of projection * @param in input of projection
* @param out output of projection * @param out output of projection
* @param passType PASS_TRAIN of PASS_TEST * @param passType PASS_TRAIN of PASS_TEST
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "SpatialPyramidPoolLayer.h"
namespace paddle {
REGISTER_LAYER(spp, SpatialPyramidPoolLayer);
ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW,
size_t imgSizeH,
size_t channels,
size_t pyramidLevel,
std::string& poolType) {
ProjectionConfig config;
config.set_type("pool2");
PoolConfig* conf = config.mutable_pool_conf();
conf->set_channels(channels);
conf->set_img_size(imgSizeW);
conf->set_img_size_y(imgSizeH);
conf->set_pool_type(poolType);
int numBins = std::pow(2, pyramidLevel);
int sizeH = std::ceil(imgSizeH / static_cast<double>(numBins));
int remainderH = sizeH * numBins - imgSizeH;
int paddingH = (remainderH + 1) / 2;
int outSizeH = outputSize(imgSizeH, sizeH, paddingH, sizeH);
int sizeW = std::ceil(imgSizeW / static_cast<double>(numBins));
int remainderW = sizeW * numBins - imgSizeW;
int paddingW = (remainderW + 1) / 2;
int outSizeW = outputSize(imgSizeW, sizeW, paddingW, sizeW);
conf->set_stride(sizeW);
conf->set_stride_y(sizeH);
conf->set_size_x(sizeW);
conf->set_size_y(sizeH);
conf->set_padding(paddingW);
conf->set_padding_y(paddingH);
conf->set_output_x(outSizeW);
conf->set_output_y(outSizeH);
config.set_output_size(outSizeH * outSizeW * channels);
return config;
}
void SpatialPyramidPoolLayer::splitInput(Argument& input, size_t height,
size_t width, bool useGpu) {
input.value = getInput(0).value;
if (passType_ != PASS_TEST && needGradient()) {
Matrix::resizeOrCreate(input.grad, height, width, /* trans */ false,
useGpu);
input.grad->zeroMem();
}
}
bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK_EQ(config_.inputs_size(), 1);
const SppConfig& sppConf = config_.inputs(0).spp_conf();
pyramidHeight_ = sppConf.pyramid_height();
poolType_ = sppConf.pool_type();
channels_ = sppConf.channels();
imgSizeW_ = sppConf.img_size();
imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
poolProjections_.reserve(pyramidHeight_);
projCol_.reserve(pyramidHeight_);
projInput_.reserve(pyramidHeight_);
projOutput_.resize(pyramidHeight_);
size_t startCol = 0;
size_t endCol = 0;
for (size_t i = 0; i < pyramidHeight_; i++) {
poolProjections_.emplace_back(PoolProjection::create(
getConfig(imgSizeW_, imgSizeH_, channels_, i, poolType_),
nullptr, useGpu_));
endCol += poolProjections_[i]->getOutputSize();
projCol_.push_back(std::make_pair(startCol, endCol));
startCol = endCol;
projInput_.emplace_back(Argument());
}
outputSize_ = endCol;
return true;
}
void SpatialPyramidPoolLayer::forward(PassType passType) {
Layer::forward(passType);
int batchSize = getInput(0).getBatchSize();
resetOutput(batchSize, outputSize_);
for (size_t i = 0; i < pyramidHeight_; i++) {
size_t startCol = projCol_[i].first;
size_t endCol = projCol_[i].second;
projOutput_[i].value = output_.value->subColMatrix(startCol, endCol);
projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol);
splitInput(projInput_[i], getInput(0).value->getHeight(),
getInput(0).value->getWidth(), useGpu_);
}
for (size_t i = 0; i < pyramidHeight_; i++) {
poolProjections_[i]->forward(&projInput_[i], &projOutput_[i], passType);
}
}
void SpatialPyramidPoolLayer::backward(const UpdateCallback& callback) {
for (size_t i = 0; i < pyramidHeight_; i++) {
if (poolProjections_[i]) {
poolProjections_[i]->backward(callback);
getInput(0).grad->add(*projInput_[i].grad);
}
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
#include "PoolProjection.h"
#include "paddle/utils/Logging.h"
namespace paddle {
class SpatialPyramidPoolLayer : public Layer {
protected:
size_t channels_;
size_t imgSizeW_;
size_t imgSizeH_;
size_t pyramidHeight_;
size_t outputSize_;
std::string poolType_;
std::vector<std::unique_ptr<PoolProjection>> poolProjections_;
std::vector<Argument> projInput_;
std::vector<Argument> projOutput_;
std::vector<std::pair<size_t, size_t>> projCol_;
public:
explicit SpatialPyramidPoolLayer(const LayerConfig& config) : Layer(config) {}
~SpatialPyramidPoolLayer() {}
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
ProjectionConfig getConfig(size_t sizeX_, size_t sizeY_, size_t channels,
size_t pyamidLevel_, std::string& poolType_);
int outputSize(int imageSize, int windowSize, int padding, int stride) {
return (imageSize - windowSize + 2 * padding) / stride + 1;
}
virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr);
void splitInput(Argument& input, size_t height, size_t width, bool useGpu);
};
} // namespace paddle
...@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and ...@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <vector>
#include <string> #include <string>
#include "paddle/gserver/layers/DataLayer.h" #include <vector>
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h" #include "paddle/trainer/Trainer.h"
#include "TestUtil.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
#include "TestUtil.h"
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
using namespace std; // NOLINT using namespace std; // NOLINT
...@@ -880,6 +880,32 @@ TEST(Layer, PoolLayer) { ...@@ -880,6 +880,32 @@ TEST(Layer, PoolLayer) {
#endif #endif
} }
void testSppLayer(const string& poolType, const int pyramidHeight, bool trans,
bool useGpu) {
TestConfig config;
config.layerConfig.set_type("spp");
config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
SppConfig* sppConfig = input->mutable_spp_conf();
sppConfig->set_pool_type(poolType);
sppConfig->set_pyramid_height(pyramidHeight);
sppConfig->set_channels(16);
sppConfig->set_img_size(10);
sppConfig->set_img_size_y(20);
testLayerGrad(config, "spp", 100, trans, useGpu);
}
TEST(Layer, SpatialPyramidPoolLayer) {
for (auto useGpu : {false, true}) {
testSppLayer("avg", 1, false, useGpu);
testSppLayer("avg", 3, false, useGpu);
testSppLayer("avg", 5, false, useGpu);
testSppLayer("max", 1, false, useGpu);
testSppLayer("max", 3, false, useGpu);
testSppLayer("avg", 5, false, useGpu);
}
}
TEST(Layer, rankCostLayer) { TEST(Layer, rankCostLayer) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("rank-cost"); config.layerConfig.set_type("rank-cost");
......
此差异已折叠。
...@@ -120,6 +120,14 @@ message PoolConfig { ...@@ -120,6 +120,14 @@ message PoolConfig {
optional uint32 padding_y = 13 [default = 0]; optional uint32 padding_y = 13 [default = 0];
} }
message SppConfig {
required string pool_type = 1;
required uint32 pyramid_height = 2;
required uint32 channels = 3;
required uint32 img_size = 4;
optional uint32 img_size_y = 5;
}
message NormConfig { message NormConfig {
// rnorm or cmrnorm // rnorm or cmrnorm
required string norm_type = 1; required string norm_type = 1;
...@@ -194,6 +202,9 @@ message ProjectionConfig { ...@@ -194,6 +202,9 @@ message ProjectionConfig {
optional ConvConfig conv_conf = 8; optional ConvConfig conv_conf = 8;
optional int32 num_filters = 9; optional int32 num_filters = 9;
// For pool
optional PoolConfig pool_conf = 10;
// For IdentityOffsetProjection // For IdentityOffsetProjection
optional uint64 offset = 11 [default = 0]; optional uint64 offset = 11 [default = 0];
} }
...@@ -235,6 +246,7 @@ message LayerInputConfig { ...@@ -235,6 +246,7 @@ message LayerInputConfig {
// Set the argument name. // Set the argument name.
optional string input_layer_argument = 9; optional string input_layer_argument = 9;
optional MaxOutConfig maxout_conf = 10; optional MaxOutConfig maxout_conf = 10;
optional SppConfig spp_conf = 11;
} }
message LayerConfig { message LayerConfig {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册