提交 ae7452f4 编写于 作者: Y Yu Yang

Merge branch 'develop' of github.com:baidu/Paddle into feature/fix_pydataprovider_multiple_obj_bugs

......@@ -465,6 +465,11 @@ SumOfSquaresCostLayer
.. doxygenclass:: paddle::SumOfSquaresCostLayer
:members:
SumCostLayer
`````````````````````
.. doxygenclass:: paddle::SumCostLayer
:members:
CosSimLayer
-----------
.. doxygenclass:: paddle::CosSimLayer
......
......@@ -46,6 +46,12 @@ conv_operator
:members: conv_operator
:noindex:
conv_projection
-------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: conv_projection
:noindex:
conv_shift_layer
------------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -73,6 +79,12 @@ img_pool_layer
:members: img_pool_layer
:noindex:
spp_layer
--------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: spp_layer
:noindex:
maxout_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -254,6 +266,12 @@ expand_layer
:members: expand_layer
:noindex:
repeat_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: repeat_layer
:noindex:
Math Layers
===========
......@@ -401,6 +419,12 @@ hsigmoid
:members: hsigmoid
:noindex:
sum_cost
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: sum_cost
:noindex:
Check Layer
============
......
......@@ -91,6 +91,7 @@ extern void hl_expand_feature2col(
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
* @param[in] tgtStride stride between output data samples.
*
*/
extern void hl_maxpool_forward(
......@@ -100,7 +101,8 @@ extern void hl_maxpool_forward(
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData);
const int paddingH, const int paddingW,
real* tgtData, const int tgtStride);
/**
* @brief Maximum pool backward.
......@@ -123,6 +125,7 @@ extern void hl_maxpool_forward(
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] targetGrad output grad.
* @param[in] outStride stride between output data samples.
*
*/
extern void hl_maxpool_backward(
......@@ -135,7 +138,7 @@ extern void hl_maxpool_backward(
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad);
real* targetGrad, const int outStride);
/**
* @brief Averge pool forward.
......@@ -154,6 +157,7 @@ extern void hl_maxpool_backward(
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
* @param[in] tgtStride stride between output data samples.
*
*/
extern void hl_avgpool_forward(
......@@ -163,7 +167,8 @@ extern void hl_avgpool_forward(
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData);
const int paddingH, const int paddingW,
real* tgtData, const int tgtStride);
/**
* @brief Maximum pool backward.
......@@ -184,6 +189,7 @@ extern void hl_avgpool_forward(
* @param[in] scaleA scale.
* @param[in] scaleB scale.
* @param[out] backGrad output grad.
* @param[in] outStride stride between output data samples.
*
*/
extern void hl_avgpool_backward(
......@@ -195,7 +201,7 @@ extern void hl_avgpool_backward(
const int strideH, const int strideW,
int paddingH, int paddingW,
real scaleA, real scaleB,
real* backGrad);
real* backGrad, const int outStride);
/**
* @brief Cross-map-respose normalize forward.
......
......@@ -44,7 +44,8 @@ inline void hl_maxpool_forward(
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {}
const int paddingH, const int paddingW,
real* tgtData, const int tgtStride) {}
inline void hl_maxpool_backward(
const int frameCnt, const real* inputData,
......@@ -56,7 +57,7 @@ inline void hl_maxpool_backward(
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad) {}
real* targetGrad, const int outStride) {}
inline void hl_avgpool_forward(
const int frameCnt, const real* inputData,
......@@ -65,7 +66,8 @@ inline void hl_avgpool_forward(
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {}
const int paddingH, const int paddingW,
real* tgtData, const int tgtStride) {}
inline void hl_avgpool_backward(
const int frameCnt, const real* outGrad,
......@@ -76,7 +78,7 @@ inline void hl_avgpool_backward(
const int strideH, const int strideW,
int paddingH, int paddingW,
real scaleA, real scaleB,
real* backGrad) {}
real* backGrad, const int outStride) {}
inline void hl_CMRNorm_forward(
size_t frameCnt, const real* in, real* scale, real* out,
......
......@@ -152,7 +152,7 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
const int ksizeW, const int ksizeH,
const int strideH, const int strideW,
const int offsetH, const int offsetW,
real* tgtData) {
real* tgtData, const int tgtStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int pw = index % pooledW;
......@@ -173,7 +173,9 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
maxval = inputData[h * width + w];
}
}
tgtData[index] = maxval;
int tgtIndex = index % (pooledW * pooledH * channels) +
frameNum * tgtStride;
tgtData[tgtIndex] = maxval;
}
}
......@@ -184,7 +186,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real* tgtData) {
real* tgtData, const int tgtStride) {
int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
......@@ -194,7 +196,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
KeMaxPoolForward<<< grid, threads, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, channels, height, width,
pooledH, pooledW, sizeX, sizeY, strideH, strideW,
paddingH, paddingW, tgtData);
paddingH, paddingW, tgtData, tgtStride);
CHECK_SYNC("hl_maxpool_forward failed");
}
......@@ -207,7 +209,7 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
const int strideH, const int strideW,
const int padH, const int padW,
real scaleA, real scaleB,
real* targetGrad) {
real* targetGrad, const int outStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
// find out the local index
......@@ -223,8 +225,8 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0;
real gradient = 0;
real input = inputData[index];
outData += (frameNum * channels + offsetC) * pooledH * pooledW;
outGrad += (frameNum * channels + offsetC) * pooledH * pooledW;
outData += (frameNum * outStride + offsetC * pooledH * pooledW);
outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
if (input == outData[ph * pooledW + pw]) {
......@@ -246,7 +248,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad) {
real* targetGrad, const int outStride) {
int num_kernels = height * width * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
......@@ -257,7 +259,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
strideH, strideW,
paddingH, paddingW,
scaleA, scaleB,
targetGrad);
targetGrad, outStride);
CHECK_SYNC("hl_maxpool_backward");
}
......@@ -268,7 +270,7 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int padH, const int padW,
real* tgtData) {
real* tgtData, const int tgtStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int pw = index % pooledW;
......@@ -293,7 +295,9 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
aveval += inputData[h * width + w];
}
}
tgtData[index] = aveval / pool_size;
int tgtIndex = index % (pooledW * pooledH * channels) +
frameNum * tgtStride;
tgtData[tgtIndex] = aveval / pool_size;
}
}
......@@ -303,14 +307,15 @@ void hl_avgpool_forward(const int frameCnt, const real* inputData,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {
const int paddingH, const int paddingW,
real* tgtData, const int tgtStride) {
int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
KeAvgPoolForward<<< blocks, 1024, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, channels,
height, width, pooledH, pooledW,
sizeX, sizeY, strideH, strideW,
paddingH, paddingW, tgtData);
paddingH, paddingW, tgtData, tgtStride);
CHECK_SYNC("hl_avgpool_forward failed");
}
......@@ -322,7 +327,7 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
const int strideH, const int strideW,
const int padH, const int padW,
real scaleA, real scaleB,
real* tgtGrad) {
real* tgtGrad, const int outStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int offsetW = index % width + padW;
......@@ -335,7 +340,8 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0;
int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0;
real gradient = 0;
outGrad += (frameNum * channels + offsetC) * pooledH * pooledW;
outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
......@@ -360,7 +366,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* backGrad) {
real* backGrad, const int outStride) {
int num_kernels = height * width * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
......@@ -370,7 +376,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
strideH, strideW,
paddingH, paddingW,
scaleA, scaleB,
backGrad);
backGrad, outStride);
CHECK_SYNC("hl_avgpool_backward failed");
}
......
......@@ -562,4 +562,39 @@ void HuberTwoClass::backwardImpIn(
}
}
/**
* This cost layer compute the sum of its input as loss.
* \f[
* o(i) = \sum_{j=1}^D y_{ij}
* \f]
*/
class SumCostLayer : public Layer {
public:
explicit SumCostLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
bool ret = Layer::init(layerMap, parameterMap);
if (!ret) return ret;
CHECK_EQ(inputLayers_.size(), 1UL);
return true;
}
virtual void forward(PassType passType) {
Layer::forward(passType);
const MatrixPtr& input = getInputValue(0);
/* malloc memory for the output_ if necessary */
int batchSize = input->getHeight();
int size = 1;
resizeOutput(batchSize, size);
output_.value->sumRows(*input);
}
virtual void backward(const UpdateCallback& callback = nullptr) {
getInputGrad(0)->add((real)1);
}
};
REGISTER_LAYER(sum_cost, SumCostLayer);
} // namespace paddle
......@@ -129,7 +129,7 @@ protected:
* This cost layer compute Euclidean (L2) loss for real-valued regression
* tasks.
* \f[
* L = \frac{1}{2N} \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2}
* L = \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2}
* \f]
*/
class SumOfSquaresCostLayer : public CostLayer {
......
......@@ -52,10 +52,8 @@ bool PoolLayer::init(const LayerMap& layerMap,
Layer* PoolLayer::create(const LayerConfig& config) {
CHECK_EQ(config.inputs_size(), 1);
const std::string& pool = config.inputs(0).pool_conf().pool_type();
if (pool == "max-projection") {
return new MaxPoolProjectionLayer(config);
} else if (pool == "avg-projection") {
return new AvgPoolProjectionLayer(config);
if (pool == "max-projection" || pool == "avg-projection") {
return new PoolProjectionLayer(config);
#ifndef PADDLE_ONLY_CPU
} else if (CudnnPoolLayer::typeCheck(pool)) {
return new CudnnPoolLayer(config);
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PoolProjection.h"
namespace paddle {
REGISTER_PROJECTION_CREATE_FUNC(pool, &PoolProjection::create);
PoolProjection::PoolProjection(const ProjectionConfig& config,
ParameterPtr parameter, bool useGpu)
: Projection(config, parameter, useGpu) {
const PoolConfig& conf = config_.pool_conf();
poolType_ = conf.pool_type();
channels_ = conf.channels();
sizeX_ = conf.size_x();
stride_ = conf.stride();
outputX_ = conf.output_x();
imgSize_ = conf.img_size();
confPadding_ = conf.padding();
sizeY_ = conf.has_size_y() ? conf.size_y() : conf.size_x();
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride();
confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
}
size_t PoolProjection::getSize() {
imgSizeY_ = in_->getFrameHeight();
imgSize_ = in_->getFrameWidth();
const PoolConfig& conf = config_.pool_conf();
if (imgSizeY_ == 0) {
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
}
if (imgSize_ == 0) {
imgSize_ = conf.img_size();
}
outputY_ = outputSize(imgSizeY_, sizeY_, confPaddingY_, strideY_,
/* caffeMode */ false);
outputX_ = outputSize(imgSize_, sizeX_, confPadding_, stride_,
/* caffeMode */ false);
const_cast<Argument*>(out_)->setFrameHeight(outputY_);
const_cast<Argument*>(out_)->setFrameWidth(outputX_);
return outputY_ * outputX_ * channels_;
}
PoolProjection* PoolProjection::create(const ProjectionConfig& config,
ParameterPtr parameter, bool useGpu) {
const std::string& pool = config.pool_conf().pool_type();
if (pool == "max-projection") {
return new MaxPoolProjection(config, parameter, useGpu);
} else if (pool == "avg-projection") {
return new AvgPoolProjection(config, parameter, useGpu);
} else {
LOG(FATAL) << "Unknown pool type: " << pool;
return nullptr;
}
}
void MaxPoolProjection::forward() {
size_t width = getSize();
CHECK_EQ(width, out_->value->getWidth());
MatrixPtr inputV = in_->value;
MatrixPtr outV = out_->value;
outV->maxPoolForward(*inputV, imgSizeY_, imgSize_, channels_, sizeX_, sizeY_,
strideY_, stride_, outputY_, outputX_, confPaddingY_,
confPadding_);
}
void MaxPoolProjection::backward(const UpdateCallback& callback) {
(void)callback;
MatrixPtr outGrad = out_->grad;
MatrixPtr inputV = in_->value;
MatrixPtr outV = out_->value;
MatrixPtr inputGrad = in_->grad;
if (NULL == inputGrad) {
return;
}
inputGrad->maxPoolBackward(*inputV, imgSizeY_, imgSize_, *outGrad, *outV,
sizeX_, sizeY_, strideY_, stride_, outputY_,
outputX_, 1, 1, confPaddingY_, confPadding_);
}
void AvgPoolProjection::forward() {
size_t width = getSize();
CHECK_EQ(width, out_->value->getWidth());
MatrixPtr inputV = in_->value;
MatrixPtr outV = out_->value;
outV->avgPoolForward(*inputV, imgSizeY_, imgSize_, channels_, sizeX_, sizeY_,
strideY_, stride_, outputY_, outputX_, confPaddingY_,
confPadding_);
}
void AvgPoolProjection::backward(const UpdateCallback& callback) {
(void)callback;
MatrixPtr outputGrad = out_->grad;
MatrixPtr inputGrad = in_->grad;
if (NULL == inputGrad) {
return;
}
inputGrad->avgPoolBackward(*outputGrad, imgSizeY_, imgSize_, sizeX_, sizeY_,
strideY_, stride_, outputY_, outputX_, 1, 1,
confPaddingY_, confPadding_);
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Projection.h"
#include "paddle/math/MathUtils.h"
namespace paddle {
class PoolProjection : public Projection {
protected:
size_t imgSizeY_, imgSize_;
size_t outputY_, outputX_;
size_t strideY_, stride_;
size_t sizeY_, sizeX_;
int confPaddingY_, confPadding_;
size_t channels_;
std::string poolType_;
public:
PoolProjection(const ProjectionConfig& config, ParameterPtr parameter,
bool useGpu);
static PoolProjection* create(const ProjectionConfig& config,
ParameterPtr parameter, bool useGpu);
const std::string& getPoolType() const { return poolType_; }
size_t getSize();
};
class MaxPoolProjection : public PoolProjection {
public:
MaxPoolProjection(const ProjectionConfig& config, ParameterPtr parameter,
bool useGpu)
: PoolProjection(config, parameter, useGpu) {}
virtual void forward();
virtual void backward(const UpdateCallback& callback = nullptr);
};
class AvgPoolProjection : public PoolProjection {
public:
AvgPoolProjection(const ProjectionConfig& config, ParameterPtr parameter,
bool useGpu)
: PoolProjection(config, parameter, useGpu) {}
virtual void forward();
virtual void backward(const UpdateCallback& callback = nullptr);
};
} // namespace paddle
......@@ -18,6 +18,7 @@ limitations under the License. */
namespace paddle {
size_t PoolProjectionLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
......@@ -37,74 +38,23 @@ size_t PoolProjectionLayer::getSize() {
layerSize = outputH_ * outputW_ * channels_;
getOutput().setFrameHeight(outputH_);
getOutput().setFrameWidth(outputW_);
return layerSize;
}
void MaxPoolProjectionLayer::forward(PassType passType) {
Layer::forward(passType);
/* malloc memory for the output_ if necessary */
/* note: one sample correspond to one ROW */
MatrixPtr input = getInputValue(0);
int batchSize = input->getHeight();
int size = getSize();
resetOutput(batchSize, size);
MatrixPtr outV = getOutputValue();
outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, confPaddingY_,
confPadding_);
}
void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
(void)callback;
if (NULL == getInputGrad(0)) {
return;
}
/* Do derivation */
MatrixPtr outGrad = getOutputGrad();
MatrixPtr inputV = getInputValue(0);
MatrixPtr outV = getOutputValue();
MatrixPtr inputGrad = getInputGrad(0);
inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV,
sizeX_, sizeY_, strideY_, stride_, outputH_,
outputW_, 1, 1, confPaddingY_, confPadding_);
}
void AvgPoolProjectionLayer::forward(PassType passType) {
void PoolProjectionLayer::forward(PassType passType) {
Layer::forward(passType);
/* malloc memory for the output_ if necessary */
/* note: one sample correspond to one ROW */
MatrixPtr input = getInputValue(0);
int batchSize = input->getHeight();
const Argument& in = getInput(0);
int batchSize = in.value->getHeight();
int size = getSize();
resetOutput(batchSize, size);
MatrixPtr outV = getOutputValue();
outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, confPaddingY_,
confPadding_);
poolProjection_->forward(&in, &output_, passType);
}
void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
void PoolProjectionLayer::backward(const UpdateCallback& callback) {
(void)callback;
if (NULL == getInputGrad(0)) {
return;
}
/* Do derivation */
MatrixPtr outputGrad = getOutputGrad();
MatrixPtr inputGrad = getInputGrad(0);
inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, 1, 1,
confPaddingY_, confPadding_);
poolProjection_->backward(callback);
}
} // namespace paddle
......@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "PoolLayer.h"
#include "PoolProjection.h"
#include "paddle/math/Matrix.h"
#include <vector>
namespace paddle {
/**
......@@ -27,33 +27,18 @@ class PoolProjectionLayer : public PoolLayer {
protected:
size_t imgSizeH_, imgSizeW_;
size_t outputH_, outputW_;
std::unique_ptr<PoolProjection> poolProjection_;
ProjectionConfig projectionConfig_;
public:
size_t getSize();
explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {}
};
/**
* @brief A layer for max pooling
*/
class MaxPoolProjectionLayer : public PoolProjectionLayer {
public:
explicit MaxPoolProjectionLayer(const LayerConfig& config)
: PoolProjectionLayer(config) {}
~MaxPoolProjectionLayer() {}
explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {
PoolConfig* conf = projectionConfig_.mutable_pool_conf();
*conf = config_.inputs(0).pool_conf();
poolProjection_.reset(
PoolProjection::create(projectionConfig_, nullptr, useGpu_));
}
virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr);
};
/**
* @brief A layer for average pooling
*/
class AvgPoolProjectionLayer : public PoolProjectionLayer {
public:
explicit AvgPoolProjectionLayer(const LayerConfig& config)
: PoolProjectionLayer(config) {}
~AvgPoolProjectionLayer() {}
size_t getSize();
virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr);
......
......@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/parameter/Parameter.h"
#include "ModelConfig.pb.h"
#include "Layer.h"
#include "ModelConfig.pb.h"
#include "paddle/parameter/Parameter.h"
namespace paddle {
......@@ -28,6 +27,11 @@ namespace paddle {
Projection::registrar_.registerClass<__class_name>(#__type_name); \
})
#define REGISTER_PROJECTION_CREATE_FUNC(__type_name, createFunction) \
static InitFunction __reg_type_##__type_name([]() { \
Projection::registrar_.registerClass(#__type_name, createFunction); \
})
/**
* A projection takes one Argument as input, calculate the result and add it
* to output Argument.
......@@ -50,7 +54,8 @@ public:
registrar_;
/**
* Forward propagation. If backward() will be called, in and out must be kept valid until then.
* Forward propagation. If backward() will be called, in and out must be kept
* valid until then.
* @param in input of projection
* @param out output of projection
* @param passType PASS_TRAIN of PASS_TEST
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "SpatialPyramidPoolLayer.h"
namespace paddle {
REGISTER_LAYER(spp, SpatialPyramidPoolLayer);
ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW,
size_t imgSizeH,
size_t channels,
size_t pyramidLevel,
std::string& poolType) {
ProjectionConfig config;
config.set_type("pool");
PoolConfig* conf = config.mutable_pool_conf();
conf->set_channels(channels);
conf->set_img_size(imgSizeW);
conf->set_img_size_y(imgSizeH);
conf->set_pool_type(poolType);
int numBins = std::pow(2, pyramidLevel);
int sizeH = std::ceil(imgSizeH / static_cast<double>(numBins));
int paddingH = (sizeH * numBins - imgSizeH + 1) / 2;
int outSizeH = outputSize(imgSizeH, sizeH, paddingH, sizeH, true);
int sizeW = std::ceil(imgSizeW / static_cast<double>(numBins));
int paddingW = (sizeW * numBins - imgSizeW + 1) / 2;
int outSizeW = outputSize(imgSizeW, sizeW, paddingW, sizeW, true);
conf->set_stride(sizeW);
conf->set_stride_y(sizeH);
conf->set_size_x(sizeW);
conf->set_size_y(sizeH);
conf->set_padding(paddingW);
conf->set_padding_y(paddingH);
conf->set_output_x(outSizeW);
conf->set_output_y(outSizeH);
config.set_output_size(outSizeH * outSizeW * channels);
return config;
}
size_t SpatialPyramidPoolLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
const SppConfig& sppConf = config_.inputs(0).spp_conf();
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
}
if (imgSizeW_ == 0) {
imgSizeW_ = sppConf.img_size();
}
size_t outputH = 1;
size_t outputW = (std::pow(4, pyramidHeight_) - 1) / (4 - 1);
layerSize = outputH * outputW * channels_;
return layerSize;
}
bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK_EQ(config_.inputs_size(), 1);
const SppConfig& sppConf = config_.inputs(0).spp_conf();
pyramidHeight_ = sppConf.pyramid_height();
poolType_ = sppConf.pool_type();
channels_ = sppConf.channels();
imgSizeW_ = sppConf.img_size();
imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
poolProjections_.reserve(pyramidHeight_);
projCol_.reserve(pyramidHeight_);
projOutput_.resize(pyramidHeight_);
size_t startCol = 0;
size_t endCol = 0;
for (size_t i = 0; i < pyramidHeight_; i++) {
poolProjections_.emplace_back(PoolProjection::create(
getConfig(imgSizeW_, imgSizeH_, channels_, i, poolType_), nullptr,
useGpu_));
endCol += poolProjections_[i]->getOutputSize();
projCol_.push_back(std::make_pair(startCol, endCol));
startCol = endCol;
}
CHECK_EQ(endCol, getSize());
return true;
}
void SpatialPyramidPoolLayer::forward(PassType passType) {
Layer::forward(passType);
int batchSize = getInput(0).getBatchSize();
resetOutput(batchSize, getSize());
for (size_t i = 0; i < pyramidHeight_; i++) {
size_t startCol = projCol_[i].first;
size_t endCol = projCol_[i].second;
projOutput_[i].value = output_.value->subColMatrix(startCol, endCol);
projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol);
}
for (size_t i = 0; i < pyramidHeight_; i++) {
poolProjections_[i]->forward(&getInput(0), &projOutput_[i], passType);
}
}
void SpatialPyramidPoolLayer::backward(const UpdateCallback& callback) {
for (size_t i = 0; i < pyramidHeight_; i++) {
if (poolProjections_[i]) {
poolProjections_[i]->backward(callback);
}
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
#include "PoolProjection.h"
#include "paddle/math/MathUtils.h"
#include "paddle/utils/Logging.h"
namespace paddle {
/**
* @brief A layer for spatial pyramid pooling on the input image by taking
* the max, average, etc. within regions, so that the result vector of
* different sized images are of the same size.
*
* The config file api is spp_layer.
*/
class SpatialPyramidPoolLayer : public Layer {
protected:
size_t channels_;
size_t imgSizeW_;
size_t imgSizeH_;
size_t pyramidHeight_;
std::string poolType_;
std::vector<std::unique_ptr<PoolProjection>> poolProjections_;
std::vector<Argument> projOutput_;
std::vector<std::pair<size_t, size_t>> projCol_;
public:
explicit SpatialPyramidPoolLayer(const LayerConfig& config) : Layer(config) {}
~SpatialPyramidPoolLayer() {}
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
ProjectionConfig getConfig(size_t sizeX_, size_t sizeY_, size_t channels,
size_t pyamidLevel_, std::string& poolType_);
size_t getSize();
virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr);
};
} // namespace paddle
......@@ -13,15 +13,15 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include "paddle/gserver/layers/DataLayer.h"
#include <vector>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/math/MathUtils.h"
#include "TestUtil.h"
#include "LayerGradUtil.h"
#include "TestUtil.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
......@@ -981,6 +981,32 @@ TEST(Layer, PoolLayer) {
#endif
}
void testSppLayer(const string& poolType, const int pyramidHeight, bool trans,
bool useGpu) {
TestConfig config;
config.layerConfig.set_type("spp");
config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
SppConfig* sppConfig = input->mutable_spp_conf();
sppConfig->set_pool_type(poolType);
sppConfig->set_pyramid_height(pyramidHeight);
sppConfig->set_channels(16);
sppConfig->set_img_size(10);
sppConfig->set_img_size_y(20);
int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1);
config.layerConfig.set_size(outputSize * sppConfig->channels());
testLayerGrad(config, "spp", 100, trans, useGpu);
}
TEST(Layer, SpatialPyramidPoolLayer) {
for (auto useGpu : {false, true}) {
for (auto pyramidHeight : {1, 2, 3}) {
testSppLayer("avg-projection", pyramidHeight, false, useGpu);
testSppLayer("max-projection", pyramidHeight, false, useGpu);
}
}
}
TEST(Layer, rankCostLayer) {
TestConfig config;
config.layerConfig.set_type("rank-cost");
......@@ -998,6 +1024,19 @@ TEST(Layer, rankCostLayer) {
}
}
TEST(Layer, sumCostLayer) {
TestConfig config;
config.layerConfig.set_type("sum_cost");
config.biasSize = 0;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "sum_cost", 100, false, useGpu);
}
}
TEST(Layer, weightedRankCostLayer) {
TestConfig config;
config.layerConfig.set_type("rank-cost");
......
......@@ -13,20 +13,20 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "Matrix.h"
#include "MathFunctions.h"
#include "SparseMatrix.h"
#include "SparseRowMatrix.h"
#include "MathFunctions.h"
#include <cmath>
#include <float.h>
#include <algorithm>
#include <cmath>
#include "paddle/utils/Logging.h"
#include <string.h>
#include "hl_cnn.h"
#include "hl_gpu.h"
#include "hl_table_apply.h"
#include "hl_top_k.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/ThreadLocal.h"
......@@ -393,11 +393,11 @@ void GpuMatrix::collectBias(Matrix& a, real scale) {
} else {
real* data = getData();
hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get();
hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(),
width_, scale);
hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), width_, scale);
}
}
void GpuMatrix::collectSharedBias(Matrix& a, real scale) {
CHECK_EQ(getHeight(), (size_t)1);
CHECK_EQ(a.getWidth() % getWidth(), 0UL);
......@@ -453,8 +453,8 @@ void GpuMatrix::mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB,
hl_trans_op_t transa = !a.isTransposed() ? HPPL_OP_N : HPPL_OP_T;
hl_trans_op_t transb = !b.isTransposed() ? HPPL_OP_N : HPPL_OP_T;
hl_matrix_mul(A_d, transa, B_d, transb, C_d, dimM, dimN, dimK,
scaleAB, scaleT, lda, ldb, ldc);
hl_matrix_mul(A_d, transa, B_d, transb, C_d, dimM, dimN, dimK, scaleAB,
scaleT, lda, ldb, ldc);
}
void GpuMatrix::mul(const GpuSparseMatrix& a, const GpuMatrix& b, real scaleAB,
......@@ -475,8 +475,8 @@ void GpuMatrix::mul(const GpuSparseMatrix& a, const GpuMatrix& b, real scaleAB,
hl_sparse_matrix_s A_d = a.sMatrix_.get();
real* B_d = b.data_;
real* C_d = data_;
hl_matrix_csr_mul_dense(A_d, transA, B_d, HPPL_OP_N, C_d, height_,
width_, b.height_, scaleAB, scaleT);
hl_matrix_csr_mul_dense(A_d, transA, B_d, HPPL_OP_N, C_d, height_, width_,
b.height_, scaleAB, scaleT);
}
void GpuMatrix::mul(const GpuMatrix& a, const GpuSparseMatrix& b, real scaleAB,
......@@ -497,11 +497,11 @@ void GpuMatrix::mul(const GpuMatrix& a, const GpuSparseMatrix& b, real scaleAB,
<< "Matrix dimensions are not equal";
}
if (b.format_ == SPARSE_CSC) {
hl_matrix_dense_mul_csc(A_d, HPPL_OP_N, B_d, transB, C_d, height_,
width_, a.width_, scaleAB, scaleT);
hl_matrix_dense_mul_csc(A_d, HPPL_OP_N, B_d, transB, C_d, height_, width_,
a.width_, scaleAB, scaleT);
} else {
hl_matrix_dense_mul_csr(A_d, HPPL_OP_N, B_d, transB, C_d, height_,
width_, a.width_, scaleAB, scaleT);
hl_matrix_dense_mul_csr(A_d, HPPL_OP_N, B_d, transB, C_d, height_, width_,
a.width_, scaleAB, scaleT);
}
}
......@@ -563,8 +563,8 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
size_t tableSize = table.getHeight();
int* index = ids.getData();
hl_matrix_select_rows(a, stride_, table.getData(), table.stride_,
index, numSamples, tableSize, dim);
hl_matrix_select_rows(a, stride_, table.getData(), table.stride_, index,
numSamples, tableSize, dim);
#endif
}
......@@ -581,8 +581,8 @@ void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
size_t tableSize = table.getHeight();
int* index = ids.getData();
hl_matrix_add_to_rows(table.getData(), table.stride_, a, stride_,
index, numSamples, tableSize, dim);
hl_matrix_add_to_rows(table.getData(), table.stride_, a, stride_, index,
numSamples, tableSize, dim);
#endif
}
......@@ -617,13 +617,8 @@ void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
CHECK_EQ(maxIds.getSize(), numSamples * beam);
CHECK_EQ(maxVal.getHeight(), numSamples);
hl_matrix_top_k(maxVal.getData(),
maxVal.getStride(),
maxIds.getData(),
this->getData(),
this->getStride(),
this->getWidth(),
beam,
hl_matrix_top_k(maxVal.getData(), maxVal.getStride(), maxIds.getData(),
this->getData(), this->getStride(), this->getWidth(), beam,
numSamples);
#endif
}
......@@ -651,8 +646,8 @@ void GpuMatrix::maxoutForward(Matrix& a, IVector& id, size_t channels,
real* output = getData();
int* idForGpu = id.getData();
hl_maxout_forward(input, output, idForGpu, batchSize, size,
size / channels, groups);
hl_maxout_forward(input, output, idForGpu, batchSize, size, size / channels,
groups);
}
void GpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels,
......@@ -667,8 +662,8 @@ void GpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels,
const real* output = a.getData();
const int* idForGpu = id.getData();
hl_maxout_backward(input, output, idForGpu, batchSize, size,
size / channels, groups);
hl_maxout_backward(input, output, idForGpu, batchSize, size, size / channels,
groups);
}
/*calulate the error of classification */
......@@ -684,8 +679,8 @@ void GpuMatrix::classificationError(MatrixPtr output, IVectorPtr label) {
real* recResult_d = data_;
int* label_d = label_ptr->getData();
hl_matrix_classification_error(output_d, label_d, recResult_d,
height_, output_ptr->width_);
hl_matrix_classification_error(output_d, label_d, recResult_d, height_,
output_ptr->width_);
}
/* copy -log(output[i * width + label]) to this->data[i] */
......@@ -754,8 +749,7 @@ void GpuMatrix::sequenceSoftmax(Matrix& output, const IVector& index) {
real* outputData = output.getData();
auto starts = index.getData();
int numSequences = index.getSize() - 1;
hl_sequence_softmax_forward(inputData, outputData,
starts, numSequences);
hl_sequence_softmax_forward(inputData, outputData, starts, numSequences);
}
void GpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) {
......@@ -769,8 +763,7 @@ void GpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) {
real* output_d = output.data_;
real* sftmaxSum_d = sftmaxSum.data_;
real* grad_d = data_;
hl_matrix_softmax_derivative(grad_d, output_d, sftmaxSum_d, height_,
width_);
hl_matrix_softmax_derivative(grad_d, output_d, sftmaxSum_d, height_, width_);
}
void GpuMatrix::softmaxBackward(Matrix& outputV) {
......@@ -830,15 +823,15 @@ void GpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) {
real* out = getData();
real* x = output1.getData();
real* y = output2.getData();
hl_cossim(out, x, y,
dim, output1.getHeight(), output2.getHeight(), scale);
hl_cossim(out, x, y, dim, output1.getHeight(), output2.getHeight(), scale);
}
void GpuMatrix::cosSimDerivative(Matrix& output, Matrix& prevOut1,
Matrix& prevOut2, Matrix& prevGrad1,
Matrix& prevGrad2, real scale) {
CHECK(output.useGpu_ == true && prevOut1.useGpu_ == true &&
prevOut2.useGpu_ == true && prevGrad1.useGpu_ == true &&
prevGrad2.useGpu_ == true) << "Matrix type are not equal";
prevGrad2.useGpu_ == true)
<< "Matrix type are not equal";
CHECK_EQ(getWidth(), 1UL);
CHECK_EQ(output.getWidth(), 1UL);
......@@ -858,8 +851,7 @@ void GpuMatrix::cosSimDerivative(Matrix& output, Matrix& prevOut1,
real* prevOutY = prevOut2.getData();
real* prevGradX = prevGrad1.getData();
real* prevGradY = prevGrad2.getData();
hl_cossim_derivative(grad, out, prevOutX, prevOutY,
prevGradX, prevGradY, dim,
hl_cossim_derivative(grad, out, prevOutX, prevOutY, prevGradX, prevGradY, dim,
prevOut1.getHeight(), prevOut2.getHeight(), scale);
}
......@@ -911,8 +903,8 @@ void GpuMatrix::check(std::ostream& os, Matrix& refMat, bool printDiff) {
void GpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth,
int channels, int blockH, int blockW, int strideH,
int strideW, int paddingH, int paddingW,
int outputH, int outputW) {
int strideW, int paddingH, int paddingW, int outputH,
int outputW) {
CHECK(feature.useGpu_ == true) << "Matrix type are not equal";
CHECK_EQ(size_t(feaImgHeight * feaImgWidth * channels),
......@@ -922,17 +914,16 @@ void GpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth,
size_t elemCnt = outputH * outputW * blockH * blockW * channels;
CHECK_EQ(elemCnt, height_ * width_) << "Matrix dimensions are not equal";
hl_expand_feature2col(feature.getData(), channels, feaImgHeight,
feaImgWidth, blockH, blockW, strideH, strideW,
paddingH, paddingW, outputH, outputW,
getData());
hl_expand_feature2col(feature.getData(), channels, feaImgHeight, feaImgWidth,
blockH, blockW, strideH, strideW, paddingH, paddingW,
outputH, outputW, getData());
}
void GpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight,
int thisImgWidth, int channels, int blockH,
int blockW, int strideH, int strideW, int paddingH,
int paddingW, int outputH, int outputW,
real alpha, real beta) {
int paddingW, int outputH, int outputW, real alpha,
real beta) {
CHECK(expandFeat.useGpu_ == true) << "Matrix type are not equal";
CHECK_EQ(size_t(thisImgHeight * thisImgWidth * channels),
getHeight() * getWidth())
......@@ -941,18 +932,17 @@ void GpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight,
size_t elemCnt = outputH * outputW * blockW * blockH * channels;
CHECK(elemCnt == expandFeat.getHeight() * expandFeat.getWidth())
<< "Matrix dimensions are not equal";
hl_shrink_col2feature(
expandFeat.getData(), channels, thisImgHeight, thisImgWidth, blockH,
blockW, strideH, strideW, paddingH, paddingW, outputH, outputW,
getData(), alpha, beta);
hl_shrink_col2feature(expandFeat.getData(), channels, thisImgHeight,
thisImgWidth, blockH, blockW, strideH, strideW,
paddingH, paddingW, outputH, outputW, getData(), alpha,
beta);
}
void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, size_t channels,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
size_t imgSizeW, size_t channels, size_t sizeX,
size_t sizeY, size_t strideH, size_t strideW,
size_t outputH, size_t outputW, size_t paddingH,
size_t paddingW) {
CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
real* inputData = inputMat.getData();
......@@ -963,16 +953,15 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputH * outputW * channels);
hl_maxpool_forward(frameNum, inputData, channels, height, width,
outputH, outputW, sizeX, sizeY, strideH, strideW,
paddingH, paddingW, data_);
hl_maxpool_forward(frameNum, inputData, channels, height, width, outputH,
outputW, sizeX, sizeY, strideH, strideW, paddingH,
paddingW, data_, getStride());
}
void GpuMatrix::maxPoolBackward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, Matrix& outGrad, Matrix& outV,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t sizeX, size_t sizeY, size_t strideH,
size_t strideW, size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
CHECK(inputMat.useGpu_ == true && outGrad.useGpu_ == true &&
......@@ -992,19 +981,17 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat, size_t imgSizeH,
CHECK(outGrad.getHeight() == outV.getHeight() &&
outGrad.getWidth() == outV.getWidth());
hl_maxpool_backward(frameNum, inputData, outData, outDiff, channels,
height, width, outputH, outputW, sizeX, sizeY,
strideH, strideW, paddingH, paddingW,
scaleTargets, scaleOutput, data_);
hl_maxpool_backward(frameNum, inputData, outData, outDiff, channels, height,
width, outputH, outputW, sizeX, sizeY, strideH, strideW,
paddingH, paddingW, scaleTargets, scaleOutput, data_,
outGrad.getStride());
}
void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, size_t channels,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
size_t imgSizeW, size_t channels, size_t sizeX,
size_t sizeY, size_t strideH, size_t strideW,
size_t outputH, size_t outputW, size_t paddingH,
size_t paddingW) {
CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
real* inputData = inputMat.getData();
......@@ -1015,18 +1002,17 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t imgSizeH,
CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputH * outputW * channels);
hl_avgpool_forward(frameNum, inputData, channels, height, width,
outputH, outputW, sizeX, sizeY,
strideH, strideW,
paddingH, paddingW, data_);
hl_avgpool_forward(frameNum, inputData, channels, height, width, outputH,
outputW, sizeX, sizeY, strideH, strideW, paddingH,
paddingW, data_, getStride());
}
void GpuMatrix::avgPoolBackward(Matrix& outGrad, size_t imgSizeH,
size_t imgSizeW, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
size_t strideH, size_t strideW, size_t outputH,
size_t outputW, real scaleTargets,
real scaleOutput, size_t paddingH,
size_t paddingW) {
CHECK(outGrad.useGpu_ == true) << "Matrix type are not equal";
real* outDiff = outGrad.getData();
......@@ -1038,11 +1024,10 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, size_t imgSizeH,
CHECK(height_ == outGrad.getHeight());
CHECK(outGrad.getWidth() == outputH * outputW * channels);
hl_avgpool_backward(frameNum, outDiff, channels, height, width,
outputH, outputW, sizeX, sizeY,
strideH, strideW, paddingH, paddingW,
scaleTargets, scaleOutput,
data_);
hl_avgpool_backward(frameNum, outDiff, channels, height, width, outputH,
outputW, sizeX, sizeY, strideH, strideW, paddingH,
paddingW, scaleTargets, scaleOutput, data_,
outGrad.getStride());
}
void GpuMatrix::crossMapNormalFwd(Matrix& input, size_t imgSizeH,
......@@ -1057,8 +1042,8 @@ void GpuMatrix::crossMapNormalFwd(Matrix& input, size_t imgSizeH,
CHECK(denoms.getHeight() == input.getHeight() &&
denoms.getWidth() == input.getWidth() && input.getHeight() == height_ &&
input.getWidth() == width_);
hl_CMRNorm_forward(num, input.getData(), denoms.getData(), data_,
channels, height, width, sizeX, scale, -pow);
hl_CMRNorm_forward(num, input.getData(), denoms.getData(), data_, channels,
height, width, sizeX, scale, -pow);
}
void GpuMatrix::crossMapNormalBwd(Matrix& localGrad, Matrix& denoms,
......@@ -1078,13 +1063,11 @@ void GpuMatrix::crossMapNormalBwd(Matrix& localGrad, Matrix& denoms,
denoms.getWidth() == localGrad.getWidth());
hl_CMRNorm_backward(num, preOutV.getData(), denoms.getData(),
localOutV.getData(), localGrad.getData(), data_,
channels, height, width, sizeX, -pow,
2.0f * pow * scale);
localOutV.getData(), localGrad.getData(), data_, channels,
height, width, sizeX, -pow, 2.0f * pow * scale);
}
void GpuMatrix::maxSequenceForward(Matrix& input,
const IVector& sequence,
void GpuMatrix::maxSequenceForward(Matrix& input, const IVector& sequence,
IVector& index) {
CHECK(dynamic_cast<GpuMatrix*>(&input));
CHECK(dynamic_cast<const GpuIVector*>(&sequence));
......@@ -1101,12 +1084,11 @@ void GpuMatrix::maxSequenceForward(Matrix& input,
CHECK_EQ(numSequences, sequence.getSize() - 1);
CHECK_EQ(numSequences * dim, index.getSize());
hl_max_sequence_forward(inputData, starts, outData, maxIndex,
numSequences, dim);
hl_max_sequence_forward(inputData, starts, outData, maxIndex, numSequences,
dim);
}
void GpuMatrix::maxSequenceBackward(Matrix& outputGrad,
const IVector& sequence,
void GpuMatrix::maxSequenceBackward(Matrix& outputGrad, const IVector& sequence,
IVector& index) {
CHECK(dynamic_cast<GpuMatrix*>(&outputGrad));
CHECK(dynamic_cast<const GpuIVector*>(&sequence));
......@@ -1163,9 +1145,8 @@ void GpuMatrix::contextProjectionBackwardData(MatrixPtr inputGrad,
real* inGrad = inputGrad->getData();
const int* starts = sequence.getData();
hl_context_projection_backward_data(outGrad, starts, inGrad,
numSequences, inputDim,
contextLength, contextStart);
hl_context_projection_backward_data(outGrad, starts, inGrad, numSequences,
inputDim, contextLength, contextStart);
}
void GpuMatrix::contextProjectionBackwardWeight(MatrixPtr weightGrad,
......@@ -1185,8 +1166,8 @@ void GpuMatrix::contextProjectionBackwardWeight(MatrixPtr weightGrad,
real* wtGrad = weightGrad->getData();
const int* starts = sequence.getData();
hl_context_projection_backward_weight(
outGrad, starts, wtGrad, numSequences, weightDim, totalPad, contextLength,
hl_context_projection_backward_weight(outGrad, starts, wtGrad, numSequences,
weightDim, totalPad, contextLength,
contextStart, beginPad);
}
......@@ -1199,8 +1180,7 @@ void GpuMatrix::paramReluForward(Matrix& data, Matrix& W) {
size_t numSamples = data.getHeight();
size_t partial_sum = numElements / (W.getHeight() * W.getWidth());
real* output = getData();
hl_param_relu_forward(output, input, w, numElements, numSamples,
partial_sum);
hl_param_relu_forward(output, input, w, numElements, numSamples, partial_sum);
}
void GpuMatrix::paramReluBackwardW(Matrix& oGrad, Matrix& data) {
......@@ -1212,8 +1192,8 @@ void GpuMatrix::paramReluBackwardW(Matrix& oGrad, Matrix& data) {
size_t numElements = data.getWidth();
size_t numSamples = data.getHeight();
size_t partial_sum = numElements / (this->getHeight() * this->getWidth());
hl_param_relu_backward_w(wgrad, ograd, input,
numElements, numSamples, partial_sum);
hl_param_relu_backward_w(wgrad, ograd, input, numElements, numSamples,
partial_sum);
}
void GpuMatrix::paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) {
......@@ -1224,8 +1204,8 @@ void GpuMatrix::paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) {
size_t numElements = data.getWidth();
size_t numSamples = data.getHeight();
size_t partial_sum = numElements / (W.getHeight() * W.getWidth());
hl_param_relu_backward_diff(ograd, input, w, diff,
numElements, numSamples, partial_sum);
hl_param_relu_backward_diff(ograd, input, w, diff, numElements, numSamples,
partial_sum);
}
void GpuMatrix::addColumnVector(const Matrix& b) {
......@@ -1571,8 +1551,8 @@ void CpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) {
void CpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth,
int channels, int blockH, int blockW, int strideH,
int strideW, int paddingH, int paddingW,
int outputH, int outputW) {
int strideW, int paddingH, int paddingW, int outputH,
int outputW) {
CHECK(feature.useGpu_ == false) << "Matrix type are not equal";
CHECK_EQ(size_t(feaImgHeight * feaImgWidth * channels),
......@@ -1612,8 +1592,8 @@ void CpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth,
void CpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight,
int thisImgWidth, int channels, int blockH,
int blockW, int strideH, int strideW, int paddingH,
int paddingW, int outputH, int outputW,
real alpha, real beta) {
int paddingW, int outputH, int outputW, real alpha,
real beta) {
CHECK(expandFeat.useGpu_ == false) << "Matrix type are not equal";
CHECK_EQ(size_t(thisImgHeight * thisImgWidth * channels),
getHeight() * getWidth())
......@@ -1650,11 +1630,10 @@ void CpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight,
}
void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, size_t channels,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
size_t imgSizeW, size_t channels, size_t sizeX,
size_t sizeY, size_t strideH, size_t strideW,
size_t outputH, size_t outputW, size_t paddingH,
size_t paddingW) {
real* inputData = inputMat.getData();
real* outData = data_;
size_t num = inputMat.getHeight();
......@@ -1662,15 +1641,21 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t inHeight = imgSizeH;
CHECK(inHeight * inWidth == inputMat.getWidth() / channels);
CHECK_EQ(num, this->getHeight());
CHECK_EQ(channels*outputH*outputW, this->getWidth());
CHECK_EQ(channels * outputH * outputW, this->getWidth());
size_t outStride = getStride();
/* initialize the data_ */
for (size_t i = 0; i < height_ * width_; i++) {
outData[i] = -(real)FLT_MAX;
for (size_t i = 0; i < height_; i++) {
for (size_t j = 0; j < width_; j++) {
outData[i * outStride + j] = -(real)FLT_MAX;
}
}
/* pool max one by one */
for (size_t n = 0; n < num; ++n) { // frame by frame
if (!isContiguous()) {
outData = data_ + n * outStride;
}
for (size_t c = 0; c < channels; ++c) { // channel by channel
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
......@@ -1712,7 +1697,16 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
real* inData = image.getData();
real* otData = outV.getData();
real* otGrad = outGrad.getData();
size_t outStride = outV.getStride();
real* origOutData = otData;
real* origOutGrad = otGrad;
for (size_t n = 0; n < num; ++n) {
if (!outV.isContiguous()) {
otData = origOutData + n * outStride;
otGrad = origOutGrad + n * outStride;
}
for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
......@@ -1743,9 +1737,9 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
size_t strideH, size_t strideW, size_t outputH,
size_t outputW, size_t paddingH,
size_t paddingW) {
// The main loop
size_t num = input.getHeight();
size_t inHeight = imgSizeH;
......@@ -1756,6 +1750,9 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
real* inData = input.getData();
for (size_t n = 0; n < num; ++n) {
if (!isContiguous()) {
tgtData = data_ + n * getStride();
}
for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
......@@ -1787,9 +1784,8 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
}
void CpuMatrix::avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t sizeX, size_t sizeY, size_t strideH,
size_t strideW, size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
size_t num = input.getHeight();
......@@ -1799,6 +1795,9 @@ void CpuMatrix::avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
real* outData = getData();
for (size_t n = 0; n < num; ++n) {
if (!input.isContiguous()) {
inData = input.getData() + n * input.getStride();
}
for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
......@@ -1901,8 +1900,7 @@ void CpuMatrix::crossMapNormalBwd(Matrix& localGrad, Matrix& denoms,
* Output: output size is the number of input sequences (NOT input instances).
* output[i] is set to max_{for each instance in this sequence}{input[i]}
*/
void CpuMatrix::maxSequenceForward(Matrix& input,
const IVector& sequence,
void CpuMatrix::maxSequenceForward(Matrix& input, const IVector& sequence,
IVector& index) {
CHECK(dynamic_cast<CpuMatrix*>(&input));
CHECK(dynamic_cast<const CpuIVector*>(&sequence));
......@@ -1943,8 +1941,7 @@ void CpuMatrix::maxSequenceForward(Matrix& input,
}
}
void CpuMatrix::maxSequenceBackward(Matrix& outputGrad,
const IVector& sequence,
void CpuMatrix::maxSequenceBackward(Matrix& outputGrad, const IVector& sequence,
IVector& index) {
CHECK(dynamic_cast<CpuMatrix*>(&outputGrad));
CHECK(dynamic_cast<const CpuIVector*>(&sequence));
......
......@@ -378,7 +378,7 @@ hl_activation_mode_t hlActiveType(const std::string& type) {
return HL_ACTIVATION_RELU;
} else if (type == "tanh") {
return HL_ACTIVATION_TANH;
} else if (type == "linear") {
} else if (type == "linear" || type == "") {
return HL_ACTIVATION_LINEAR;
} else {
LOG(FATAL) << "Do not support activation type " << type;
......
......@@ -120,6 +120,14 @@ message PoolConfig {
optional uint32 padding_y = 13 [default = 0];
}
message SppConfig {
required string pool_type = 1;
required uint32 pyramid_height = 2;
required uint32 channels = 3;
required uint32 img_size = 4;
optional uint32 img_size_y = 5;
}
message NormConfig {
// rnorm or cmrnorm
required string norm_type = 1;
......@@ -196,6 +204,9 @@ message ProjectionConfig {
// For IdentityOffsetProjection
optional uint64 offset = 11 [default = 0];
// For pool
optional PoolConfig pool_conf = 12;
}
message OperatorConfig {
......@@ -245,6 +256,7 @@ message LayerInputConfig {
optional string input_layer_argument = 9;
optional BilinearInterpConfig bilinear_interp_conf = 10;
optional MaxOutConfig maxout_conf = 11;
optional SppConfig spp_conf = 12;
}
message LayerConfig {
......
......@@ -218,7 +218,7 @@ def Inputs(*args):
@config_func
def HasInputsSet():
return len(g_config.model_config.input_layer_names) != 0
return len(g_current_submodel.input_layer_names) != 0
# Define the name of the output layers of the NeuralNetwork.
......@@ -471,6 +471,7 @@ class Input(Cfg):
image=None,
block_expand=None,
maxout=None,
spp=None,
format=None,
nnz=None,
is_static=None,
......@@ -671,7 +672,6 @@ class ConvProjection(Projection):
def calc_parameter_dims(self, input_size, output_size):
return None
# Define a operator for mixed layer
@config_class
class Operator(Cfg):
......@@ -796,6 +796,17 @@ class Pool(Cfg):
padding_y = None):
self.add_keys(locals())
# please refer to the comments in proto/ModelConfig.proto
@config_class
class SpatialPyramidPool(Cfg):
def __init__(
self,
pool_type,
pyramid_height,
channels,
img_width = None):
self.add_keys(locals())
# please refer to the comments in proto/ModelConfig.proto
@config_class
class Norm(Cfg):
......@@ -1081,6 +1092,22 @@ def parse_pool(pool, input_layer_name, pool_conf):
pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
pool_conf.padding_y, pool_conf.stride_y, False)
def parse_spp(spp, input_layer_name, spp_conf):
spp_conf.pool_type = spp.pool_type
config_assert(spp.pool_type in ['max-projection', 'avg-projection'],
"pool-type %s is not in " "['max-projection', 'avg-projection']"
% spp.pool_type)
spp_conf.pyramid_height = spp.pyramid_height
spp_conf.channels = spp.channels
img_pixels = g_layer_map[input_layer_name].size / spp_conf.channels
spp_conf.img_size = default(spp.img_width, int(img_pixels ** 0.5))
spp_conf.img_size_y = img_pixels / spp_conf.img_size
config_assert(spp_conf.img_size * spp_conf.img_size_y == img_pixels,
"Incorrect input image size %d for input image pixels %d"
% (spp_conf.img_size, img_pixels))
def parse_image(image, input_layer_name, image_conf):
image_conf.channels = image.channels
image_pixels = g_layer_map[input_layer_name].size / image_conf.channels
......@@ -1756,6 +1783,25 @@ class PoolLayer(LayerBase):
name, pool_conf.output_y, pool_conf.output_x))
self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels)
@config_layer('spp')
class SpatialPyramidPoolLayer(LayerBase):
def __init__(
self,
name,
inputs,
device=None):
super(SpatialPyramidPoolLayer, self).__init__(name, 'spp', 0, inputs=inputs, device=device)
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
parse_spp(
self.inputs[input_index].spp,
input_layer.name,
self.config.inputs[input_index].spp_conf)
spp_conf = self.config.inputs[input_index].spp_conf
output_size = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1)
print("output size for %s is %d " % (name, output_size))
self.set_layer_size(output_size * spp_conf.channels)
@config_layer('batch_norm')
class BatchNormLayer(LayerBase):
layer_type = 'batch_norm'
......@@ -1903,6 +1949,7 @@ define_cost('SumOfSquaresCostLayer', 'square_error')
define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
define_cost('HuberTwoClass', 'huber')
define_cost('SumCost', 'sum_cost')
@config_layer('hsigmoid')
class HierarchicalSigmoidLayer(LayerBase):
......@@ -3015,7 +3062,7 @@ def Layer(
layer_func = layers.get(type)
config_assert(layer_func,
"layer type '%s' not supported." % type)
layer_func(name, **xargs)
return layer_func(name, **xargs)
@config_func
def ParameterHook(
......
......@@ -20,3 +20,6 @@ from layers import *
from networks import *
from optimizers import *
from attrs import *
# This will enable operator overload for LayerOutput
import math
......@@ -31,6 +31,7 @@ import copy
__all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
"identity_projection", "dotmul_projection", "dotmul_operator",
"repeat_layer",
"table_projection", "mixed_layer", "data_layer",
"embedding_layer", "fc_layer", "grumemory",
"pooling_layer", "lstmemory", "last_seq", "first_seq",
......@@ -52,10 +53,11 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer',
'nce_layer',
'cross_entropy_with_selfnorm', 'cross_entropy',
'multi_binary_label_cross_entropy',
'multi_binary_label_cross_entropy', 'sum_cost',
'rank_cost', 'lambda_cost', 'huber_cost',
'block_expand_layer',
'maxout_layer', 'out_prod_layer', 'print_layer'
'maxout_layer', 'out_prod_layer', 'print_layer',
'spp_layer',
]
......@@ -99,6 +101,7 @@ class LayerType(object):
SCALING_LAYER = 'scaling'
TRANS_LAYER = 'trans'
OUT_PROD_LAYER = 'out_prod'
FEATURE_MAP_EXPAND_LAYER = 'featmap_expand'
MEMORY = 'memory'
MAXID_LAYER = 'maxid'
......@@ -113,6 +116,7 @@ class LayerType(object):
LINEAR_COMBINATION_LAYER = "convex_comb"
BLOCK_EXPAND = "blockexpand"
MAXOUT = "maxout"
SPP_LAYER = "spp"
PRINT_LAYER = "print"
......@@ -128,6 +132,7 @@ class LayerType(object):
CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm"
SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy"
MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy"
SUM_COST = "sum_cost"
@staticmethod
def is_layer_type(type_name):
......@@ -181,6 +186,7 @@ class LayerOutput(object):
reverse=None):
assert isinstance(name, basestring)
assert isinstance(layer_type, basestring)
assert size is not None
assert LayerType.is_layer_type(layer_type)
self.name = name
self.layer_type = layer_type
......@@ -873,6 +879,7 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None,
size=input.size)
@wrap_bias_attr_default()
@wrap_param_attr_default()
@wrap_act_default(param_names=['gate_act'],
......@@ -1209,6 +1216,48 @@ def expand_layer(input, expand_as,
parents=[input, expand_as])
@wrap_name_default()
@layer_support()
def repeat_layer(input, num_repeats,
name=None,
layer_attr=None):
"""
A layer for repeating the input for num_repeats times. This is equivalent
to apply concat_layer() with num_repeats same input.
.. math::
y = [x, x, \cdots, x]
The example usage is:
.. code-block:: python
expand = repeat_layer(layer, 4)
:param input: Input layer
:type input: LayerOutput
:param num_repeats: Repeat the input so many times
:type num_repeats: int
:param name: Layer name.
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
l = Layer(
inputs=[input.name],
name=name,
num_filters=num_repeats,
type=LayerType.FEATURE_MAP_EXPAND_LAYER,
**ExtraAttr.to_kwargs(layer_attr)
)
return LayerOutput(name=name,
size=l.config.size,
layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER,
parents=[input])
@wrap_name_default()
@layer_support()
def interpolation_layer(input, weight, name=None, layer_attr=None):
......@@ -1296,7 +1345,7 @@ def bilinear_interp_layer(input,
assert out_size_x > 0 and out_size_y > 0
assert input.num_filters is not None
num_channels = input.num_filters
Layer(name=name,
l = Layer(name=name,
inputs=Input(input.name,
bilinear_interp=BilinearInterp(out_size_x=out_size_x,
out_size_y=out_size_y,
......@@ -1304,7 +1353,7 @@ def bilinear_interp_layer(input,
type=LayerType.BILINEAR_INTERP_LAYER,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.BILINEAR_INTERP_LAYER, parents=[input],
num_filters=num_channels)
num_filters=num_channels, size=l.config.size)
@wrap_name_default()
@layer_support()
......@@ -1482,7 +1531,7 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
inputs=[a.name, b.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b])
return LayerOutput(name, LayerType.COSINE_SIM, parents=[a, b], size=size)
@wrap_name_default()
......@@ -1545,7 +1594,7 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
ipts_for_layer.append(label.name)
parents.append(label)
Layer(
l = Layer(
name=name,
type=LayerType.HSIGMOID,
num_classes=num_classes,
......@@ -1553,7 +1602,8 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
inputs=ipts_for_layer,
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.HSIGMOID, parents=parents)
return LayerOutput(name, LayerType.HSIGMOID, parents=parents,
size=l.config.size)
@wrap_name_default("conv")
......@@ -1671,7 +1721,7 @@ def img_conv_layer(input, filter_size, num_filters,
lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER
Layer(
l = Layer(
name=name,
inputs=Input(input.name, conv=Conv(
filter_size=filter_size, padding=padding, stride=stride,
......@@ -1687,7 +1737,8 @@ def img_conv_layer(input, filter_size, num_filters,
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, lt, parents=[input],
activation=act, num_filters=num_filters)
activation=act, num_filters=num_filters,
size=l.config.size)
@wrap_name_default("pool")
......@@ -1718,7 +1769,7 @@ def img_pool_layer(input, pool_size, name=None,
:type pool_size_y: int|None
:param num_channels: number of input channel.
:type num_channels: int
:param pool_type: pooling type. MaxPooling or AveragePooling. Default is
:param pool_type: pooling type. MaxPooling or AvgPooling. Default is
MaxPooling.
:type pool_type: BasePoolingType
:param stride: stride width of pooling.
......@@ -1750,7 +1801,7 @@ def img_pool_layer(input, pool_size, name=None,
stride_y = stride if stride_y is None else stride_y
padding_y = padding if padding_y is None else padding_y
Layer(
l = Layer(
name=name,
type=LayerType.POOL_LAYER,
inputs=[Input(input.name,
......@@ -1769,6 +1820,62 @@ def img_pool_layer(input, pool_size, name=None,
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.POOL_LAYER, parents=[input],
num_filters=num_channels, size=l.config.size)
@wrap_name_default("spp")
@layer_support()
def spp_layer(input, name=None, num_channels=None, pool_type=None,
pyramid_height=None, img_width=None, layer_attr=None):
pass
"""
Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition.
The details please refer to
`Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_.
:param name: layer name.
:type name: basestring
:param input: layer's input.
:type input: LayerOutput
:param num_channels: number of input channel.
:type num_channels: int
:param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling.
:type scale: BasePoolingType
:param pyramid_height: pyramid height.
:type pyramid_height: int
:param img_width: the width of input feature map. If it is None, the input feature
map should be square.
:type img_width: int|None
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
if num_channels is None:
assert input.num_filters is not None
num_channels = input.num_filters
if pool_type is None:
pool_type = MaxPooling()
elif isinstance(pool_type, AvgPooling):
pool_type.name = 'avg'
type_name = pool_type.name
if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)):
type_name += '-projection'
Layer(
name=name,
type=LayerType.SPP_LAYER,
inputs=Input(input.name,
spp=SpatialPyramidPool(pool_type=type_name,
channels=num_channels,
pyramid_height=pyramid_height,
img_width=img_width)
),
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.SPP_LAYER, parents=[input],
num_filters=num_channels)
......@@ -1778,7 +1885,7 @@ def __img_norm_layer__(name, input, size, norm_type, scale, power,
assert input.num_filters is not None
num_channels = input.num_filters
Layer(
l = Layer(
name=name, type=LayerType.NORM_LAYER, inputs=Input(
input.name, norm=Norm(norm_type=norm_type,
channels=num_channels, size=size,
......@@ -1788,7 +1895,8 @@ def __img_norm_layer__(name, input, size, norm_type, scale, power,
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, layer_type=LayerType.NORM_LAYER, parents=[input],
num_filters=num_channels, img_norm_type=norm_type)
num_filters=num_channels, img_norm_type=norm_type,
size=l.config.size)
@wrap_name_default("crmnorm")
......@@ -1913,7 +2021,7 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None,
num_channels = input.size
assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \
(batch_norm_type == "cudnn_batch_norm")
Layer(
l = Layer(
name=name,
inputs=Input(input.name,
image=Image(channels=num_channels),
......@@ -1929,7 +2037,8 @@ def batch_norm_layer(input, act=None, name=None, num_channels=None,
return LayerOutput(name=name, layer_type=LayerType.BATCH_NORM_LAYER,
parents=[input], activation=act,
num_filters=num_channels)
num_filters=num_channels,
size=l.config.size)
@wrap_name_default()
......@@ -2034,7 +2143,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
if each_input.num_filters is not None:
num_filters = each_input.num_filters
Layer(
l = Layer(
name=name, type=LayerType.ADDTO_LAYER, inputs=ipts_for_layer,
bias=ParamAttr.to_bias(bias_attr),
active_type=act.name,
......@@ -2042,7 +2151,8 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
)
return LayerOutput(name, LayerType.ADDTO_LAYER, parents=input,
activation=act, num_filters=num_filters)
activation=act, num_filters=num_filters,
size=l.config.size)
@wrap_act_default(act=IdentityActivation())
......@@ -2651,13 +2761,14 @@ def maxid_layer(input, name=None, layer_attr=None):
"""
assert isinstance(input, LayerOutput)
Layer(name=name,
l = Layer(name=name,
type='maxid',
inputs=[input.name],
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name,
layer_type=LayerType.MAXID_LAYER,
parents=[input])
parents=[input],
size=l.config.size)
@wrap_name_default()
......@@ -2686,13 +2797,14 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
assert isinstance(input1, LayerOutput)
assert isinstance(input2, LayerOutput)
Layer(name=name,
l = Layer(name=name,
type=LayerType.OUT_PROD_LAYER,
inputs=[input1.name, input2.name],
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name,
layer_type=LayerType.OUT_PROD_LAYER,
parents=[input1, input2])
parents=[input1, input2],
size=l.config.size)
@wrap_name_default()
......@@ -2721,13 +2833,14 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
:return: LayerOutput object.
:rtype: LayerOutput
"""
Layer(name=name,
l = Layer(name=name,
type=LayerType.EOSID_LAYER,
eos_id=eos_id,
inputs=[input.name],
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name, layer_type=LayerType.EOSID_LAYER,
parents=[input])
parents=[input],
size=l.config.size)
@wrap_name_default()
......@@ -2892,7 +3005,7 @@ def regression_cost(input, label, weight=None, name=None,
Layer(inputs=ipts, type="square_error", name=name,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.COST, parents=parents)
return LayerOutput(name, LayerType.COST, parents=parents, size=1)
@wrap_name_default("cost")
......@@ -2944,7 +3057,7 @@ def classification_cost(input, label, weight=None, name=None,
for each_evaluator in evaluator:
__add_evaluator__(each_evaluator)
return LayerOutput(name, LayerType.COST, parents=parents)
return LayerOutput(name, LayerType.COST, parents=parents, size=1)
def conv_operator(img, filter, filter_size, num_filters,
......@@ -3326,13 +3439,14 @@ def sampling_id_layer(input, name=None, layer_attr=None):
:return: LayerOutput object.
:rtype: LayerOutput
"""
Layer(
l = Layer(
name=name,
type=LayerType.SAMPLING_ID_LAYER,
inputs=[Input(input.name)],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.SAMPLING_ID_LAYER, input)
return LayerOutput(name, LayerType.SAMPLING_ID_LAYER, input,
size=l.config.size)
@wrap_name_default()
......@@ -3373,7 +3487,8 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0,
inputs=[Input(input.name)],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.SLOPE_INTERCEPT_LAYER, input)
return LayerOutput(name, LayerType.SLOPE_INTERCEPT_LAYER, input,
size=input.size)
@wrap_name_default()
......@@ -3512,7 +3627,7 @@ def block_expand_layer(input,
if num_channels is None:
assert input.num_filters is not None
num_channels = input.num_filters
Layer(name=name,
l = Layer(name=name,
inputs=Input(input.name,
block_expand=BlockExpand(channels=num_channels,
block_x=block_x,
......@@ -3525,7 +3640,8 @@ def block_expand_layer(input,
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.BLOCK_EXPAND, parents=[input])
return LayerOutput(name, LayerType.BLOCK_EXPAND, parents=[input],
size=l.config.size)
@wrap_name_default()
......@@ -3586,13 +3702,14 @@ def maxout_layer(input,
assert input.num_filters is not None
num_channels = input.num_filters
assert num_channels % groups == 0
Layer(name=name,
l = Layer(name=name,
inputs=Input(input.name,
maxout=MaxOut(channels=num_channels,
groups=groups)),
type=LayerType.MAXOUT,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.MAXOUT, parents=[input])
return LayerOutput(name, LayerType.MAXOUT, parents=[input],
size=l.config.size)
@wrap_name_default()
......@@ -3718,7 +3835,10 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None,
parents = [input, label]
if weight is not None:
parents.append(weight)
return LayerOutput(name, LayerType.CRF_LAYER, parents, size=size)
# The size for LayerOutput means the dimension of the output.
# It's different from the meaning of crf layer, which is the number of
# classes.
return LayerOutput(name, LayerType.CRF_LAYER, parents, size=1)
@wrap_name_default()
......@@ -3766,7 +3886,10 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None,
parents = [input]
if label is not None:
parents.append(label)
return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=size)
# The size for LayerOutput means the dimension of the output.
# It's different from the meaning of crf layer, which is the number of
# classes.
return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1)
@wrap_bias_attr_default(has_bias=True)
@wrap_name_default()
......@@ -3834,7 +3957,7 @@ def nce_layer(input, label, num_classes, weight=None,
ipts_for_layer.append(weight.name)
parents.append(weight)
Layer(
l = Layer(
name=name,
type=LayerType.NCE_LAYER,
num_classes=num_classes,
......@@ -3844,7 +3967,8 @@ def nce_layer(input, label, num_classes, weight=None,
bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.NCE_LAYER, parents=parents)
return LayerOutput(name, LayerType.NCE_LAYER, parents=parents,
size=l.config.size)
"""
following are cost Layers.
......@@ -3919,7 +4043,7 @@ def rank_cost(left, right, label, weight=None, name=None, coeff=1.0, layer_attr=
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.RANK_COST, parents=parents)
return LayerOutput(name, LayerType.RANK_COST, parents=parents, size=1)
@wrap_name_default()
......@@ -3971,7 +4095,8 @@ def lambda_cost(input, score, name, NDCG_num=5, max_sort_size=-1, layer_attr=Non
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.LAMBDA_COST, parents=[input, score])
return LayerOutput(name, LayerType.LAMBDA_COST, parents=[input, score],
size=1)
@wrap_name_default()
......@@ -3982,14 +4107,13 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
.. code-block:: python
cost = cross_entropy(input, label)
cost = cross_entropy(input=input_layer,
label=label_layer)
:param input: The first input layer.
:type input: LayerOutput.
:param label: The input label.
:type input: LayerOutput.
:param type: The type of cost.
:type type: basestring.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward.
......@@ -4006,7 +4130,8 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=[input, label])
return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=[input, label],
size=1)
@wrap_name_default()
......@@ -4019,14 +4144,13 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0,
.. code-block:: python
cost = cross_entropy_with_selfnorm(input, label)
cost = cross_entropy_with_selfnorm(input=input_layer,
label=label_layer)
:param input: The first input layer.
:type input: LayerOutput.
:param label: The input label.
:type input: LayerOutput.
:param type: The type of cost.
:type type: basestring.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring.
:param coeff: The coefficient affects the gradient in the backward.
......@@ -4048,7 +4172,39 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0,
return LayerOutput(name,
LayerType.CROSS_ENTROPY_WITH_SELFNORM,
parents=[input, label])
parents=[input, label], size=1)
@wrap_name_default()
@layer_support()
def sum_cost(input, name=None, layer_attr=None):
"""
A loss layer which calculate the sum of the input as loss
.. code-block:: python
cost = sum_cost(input=input_layer)
:param input: The first input layer.
:type input: LayerOutput.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring.
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput.
"""
assert isinstance(input, LayerOutput)
Layer(name=name,
type=LayerType.SUM_COST,
inputs=[input.name],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name,
LayerType.SUM_COST,
parents=[input],
size=1)
@wrap_name_default()
......@@ -4059,7 +4215,8 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
.. code-block:: python
cost = huber_cost(input, label)
cost = huber_cost(input=input_layer,
label=label_layer)
:param input: The first input layer.
:type input: LayerOutput.
......@@ -4083,7 +4240,7 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.HUBER, parents=[input, label])
return LayerOutput(name, LayerType.HUBER, parents=[input, label], size=1)
@wrap_name_default()
......@@ -4095,7 +4252,8 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0,
.. code-block:: python
cost = multi_binary_label_cross_entropy(input, label)
cost = multi_binary_label_cross_entropy(input=input_layer,
label=label_layer)
:param input: The first input layer.
:type input: LayerOutput
......@@ -4126,4 +4284,4 @@ def multi_binary_label_cross_entropy(input, label, name=None, coeff=1.0,
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
return LayerOutput(name, LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY,
parents=[input, label])
parents=[input, label], size=1)
......@@ -13,10 +13,11 @@
# limitations under the License.
from .layers import LayerOutput, mixed_layer, identity_projection, \
slope_intercept_layer
slope_intercept_layer, scaling_layer, repeat_layer
from .attrs import is_compatible_with
from .default_decorators import *
import activations as act
from paddle.trainer.config_parser import logger
__all__ = []
......@@ -40,7 +41,21 @@ register_unary_math_op('square', act.SquareActivation())
def add(layeroutput, other):
if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other)
assert isinstance(other, LayerOutput)
if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be added with"
" another LayerOutput or a number")
if layeroutput.size == other.size:
return mixed_layer(input=[identity_projection(input=layeroutput),
identity_projection(input=other)])
if other.size != 1 and layeroutput.size != 1:
logger.fatal("Two LayerOutput can be added only if they have equal size"
" or one of their sizes is 1. sizes are %s and %s" %
(layeroutput.size, other.size))
elif layeroutput.size == 1:
tmp = layeroutput
layeroutput = other
other = tmp
other = repeat_layer(other, layeroutput.size)
return mixed_layer(input=[identity_projection(input=layeroutput),
identity_projection(input=other)])
......@@ -50,10 +65,11 @@ LayerOutput.__add__ = add
def sub(layeroutput, other):
if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other)
assert isinstance(other, LayerOutput)
if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be subtracted with"
" another Layeroutput or a number")
neg = slope_intercept_layer(input=other, slope=-1.0)
return mixed_layer(input=[identity_projection(input=layeroutput),
identity_projection(input=neg)])
return add(layeroutput, neg)
LayerOutput.__sub__ = sub
......@@ -62,3 +78,20 @@ def rsub(layeroutput, other):
return add(neg, other)
LayerOutput.__rsub__ = rsub
def mul(layeroutput, other):
if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, slope=other)
if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be multiplied with"
" another Layeroutput or a number")
elif layeroutput.size == 1:
return scaling_layer(input=other, weight=layeroutput)
elif other.size == 1:
return scaling_layer(input=layeroutput, weight=other)
else:
logger.fatal("At least one of the operand of '*' must be a number"
" or a LayerOutput with size=1")
LayerOutput.__mul__ = mul
LayerOutput.__rmul__ = mul
......@@ -11,8 +11,8 @@ test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_spilit_datasource)
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_split_datasource)
for conf in ${configs[*]}
......
......@@ -19,6 +19,12 @@ y = x + y
y = y - x
y = y - 2
y = 2 - y
y = 2 * y
y = y * 3
z= data_layer(name='data_2', size=1)
y = y * z
y = z * y
y = y + z
y = z + y
outputs(y)
......@@ -209,8 +209,129 @@ layers {
slope: 1.0
intercept: 2
}
layers {
name: "__slope_intercept_layer_6__"
type: "slope_intercept"
size: 100
active_type: ""
inputs {
input_layer_name: "__slope_intercept_layer_5__"
}
slope: 2
intercept: 0.0
}
layers {
name: "__slope_intercept_layer_7__"
type: "slope_intercept"
size: 100
active_type: ""
inputs {
input_layer_name: "__slope_intercept_layer_6__"
}
slope: 3
intercept: 0.0
}
layers {
name: "data_2"
type: "data"
size: 1
active_type: ""
}
layers {
name: "__scaling_layer_0__"
type: "scaling"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
inputs {
input_layer_name: "__slope_intercept_layer_7__"
}
}
layers {
name: "__scaling_layer_1__"
type: "scaling"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
inputs {
input_layer_name: "__scaling_layer_0__"
}
}
layers {
name: "__repeat_layer_0__"
type: "featmap_expand"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
num_filters: 100
}
layers {
name: "__mixed_2__"
type: "mixed"
size: 100
active_type: ""
inputs {
input_layer_name: "__scaling_layer_1__"
proj_conf {
type: "identity"
name: "___mixed_2__.w0"
input_size: 100
output_size: 100
}
}
inputs {
input_layer_name: "__repeat_layer_0__"
proj_conf {
type: "identity"
name: "___mixed_2__.w1"
input_size: 100
output_size: 100
}
}
}
layers {
name: "__repeat_layer_1__"
type: "featmap_expand"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
num_filters: 100
}
layers {
name: "__mixed_3__"
type: "mixed"
size: 100
active_type: ""
inputs {
input_layer_name: "__mixed_2__"
proj_conf {
type: "identity"
name: "___mixed_3__.w0"
input_size: 100
output_size: 100
}
}
inputs {
input_layer_name: "__repeat_layer_1__"
proj_conf {
type: "identity"
name: "___mixed_3__.w1"
input_size: 100
output_size: 100
}
}
}
input_layer_names: "data_2"
input_layer_names: "data"
output_layer_names: "__slope_intercept_layer_5__"
output_layer_names: "__mixed_3__"
sub_models {
name: "root"
layer_names: "data"
......@@ -228,8 +349,18 @@ sub_models {
layer_names: "__slope_intercept_layer_3__"
layer_names: "__slope_intercept_layer_4__"
layer_names: "__slope_intercept_layer_5__"
layer_names: "__slope_intercept_layer_6__"
layer_names: "__slope_intercept_layer_7__"
layer_names: "data_2"
layer_names: "__scaling_layer_0__"
layer_names: "__scaling_layer_1__"
layer_names: "__repeat_layer_0__"
layer_names: "__mixed_2__"
layer_names: "__repeat_layer_1__"
layer_names: "__mixed_3__"
input_layer_names: "data_2"
input_layer_names: "data"
output_layer_names: "__slope_intercept_layer_5__"
output_layer_names: "__mixed_3__"
is_recurrent_layer_group: false
}
......@@ -23,6 +23,17 @@ layers {
size: 10
active_type: ""
}
layers {
name: "__fc_layer_0__"
type: "fc"
size: 4
active_type: "tanh"
inputs {
input_layer_name: "input"
input_parameter_name: "___fc_layer_0__.w0"
}
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers {
name: "__ctc_layer_0__"
type: "ctc"
......@@ -36,17 +47,6 @@ layers {
}
norm_by_times: false
}
layers {
name: "__fc_layer_0__"
type: "fc"
size: 4
active_type: "tanh"
inputs {
input_layer_name: "input"
input_parameter_name: "___fc_layer_0__.w0"
}
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers {
name: "crf_label"
type: "data"
......@@ -191,6 +191,16 @@ layers {
}
coeff: 1.0
}
layers {
name: "__sum_cost_0__"
type: "sum_cost"
size: 1
active_type: ""
inputs {
input_layer_name: "__fc_layer_0__"
}
coeff: 1.0
}
parameters {
name: "___fc_layer_0__.w0"
size: 800
......@@ -241,14 +251,15 @@ output_layer_names: "__cross_entropy_0__"
output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "labels"
layer_names: "probs"
layer_names: "xe-label"
layer_names: "__ctc_layer_0__"
layer_names: "__fc_layer_0__"
layer_names: "__ctc_layer_0__"
layer_names: "crf_label"
layer_names: "__crf_layer_0__"
layer_names: "left"
......@@ -264,6 +275,7 @@ sub_models {
layer_names: "huber_label"
layer_names: "__huber_cost_0__"
layer_names: "__multi_binary_label_cross_entropy_0__"
layer_names: "__sum_cost_0__"
input_layer_names: "input"
input_layer_names: "labels"
input_layer_names: "crf_label"
......@@ -284,6 +296,7 @@ sub_models {
output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "data"
type: "data"
size: 3200
active_type: ""
}
layers {
name: "__spp_0__"
type: "spp"
size: 80
active_type: ""
inputs {
input_layer_name: "data"
spp_conf {
pool_type: "max-projection"
pyramid_height: 2
channels: 16
img_size: 10
img_size_y: 20
}
}
}
input_layer_names: "data"
output_layer_names: "__spp_0__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__spp_0__"
input_layer_names: "data"
output_layer_names: "__spp_0__"
is_recurrent_layer_group: false
}
......@@ -11,8 +11,9 @@ labels = data_layer(name='labels', size=5000)
probs = data_layer(name='probs', size=10)
xe_label = data_layer(name='xe-label', size=10)
hidden = fc_layer(input=seq_in, size=4)
outputs(ctc_layer(input=seq_in, label=labels),
crf_layer(input=fc_layer(input=seq_in, size=4),
crf_layer(input=hidden,
label=data_layer(name='crf_label', size=4)),
rank_cost(left=data_layer(name='left', size=1),
right=data_layer(name='right', size=1),
......@@ -23,4 +24,5 @@ outputs(ctc_layer(input=seq_in, label=labels),
cross_entropy_with_selfnorm(input=probs, label=xe_label),
huber_cost(input=data_layer(name='huber_probs', size=1),
label=data_layer(name='huber_label', size=1)),
multi_binary_label_cross_entropy(input=probs, label=xe_label))
multi_binary_label_cross_entropy(input=probs, label=xe_label),
sum_cost(input=hidden))
from paddle.trainer_config_helpers import *
settings(
batch_size=100,
learning_rate=1e-5
)
data = data_layer(name='data', size=3200)
spp = spp_layer(input=data,
pyramid_height=2,
num_channels=16,
pool_type=MaxPooling(),
img_width=10)
outputs(spp)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册