提交 ae7452f4 编写于 作者: Y Yu Yang

Merge branch 'develop' of github.com:baidu/Paddle into feature/fix_pydataprovider_multiple_obj_bugs

......@@ -465,6 +465,11 @@ SumOfSquaresCostLayer
.. doxygenclass:: paddle::SumOfSquaresCostLayer
:members:
SumCostLayer
`````````````````````
.. doxygenclass:: paddle::SumCostLayer
:members:
CosSimLayer
-----------
.. doxygenclass:: paddle::CosSimLayer
......
......@@ -46,6 +46,12 @@ conv_operator
:members: conv_operator
:noindex:
conv_projection
-------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: conv_projection
:noindex:
conv_shift_layer
------------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -73,6 +79,12 @@ img_pool_layer
:members: img_pool_layer
:noindex:
spp_layer
--------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: spp_layer
:noindex:
maxout_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -254,6 +266,12 @@ expand_layer
:members: expand_layer
:noindex:
repeat_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: repeat_layer
:noindex:
Math Layers
===========
......@@ -401,6 +419,12 @@ hsigmoid
:members: hsigmoid
:noindex:
sum_cost
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: sum_cost
:noindex:
Check Layer
============
......
......@@ -91,6 +91,7 @@ extern void hl_expand_feature2col(
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
* @param[in] tgtStride stride between output data samples.
*
*/
extern void hl_maxpool_forward(
......@@ -100,7 +101,8 @@ extern void hl_maxpool_forward(
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData);
const int paddingH, const int paddingW,
real* tgtData, const int tgtStride);
/**
* @brief Maximum pool backward.
......@@ -123,6 +125,7 @@ extern void hl_maxpool_forward(
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] targetGrad output grad.
* @param[in] outStride stride between output data samples.
*
*/
extern void hl_maxpool_backward(
......@@ -135,7 +138,7 @@ extern void hl_maxpool_backward(
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad);
real* targetGrad, const int outStride);
/**
* @brief Averge pool forward.
......@@ -154,6 +157,7 @@ extern void hl_maxpool_backward(
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
* @param[in] tgtStride stride between output data samples.
*
*/
extern void hl_avgpool_forward(
......@@ -163,7 +167,8 @@ extern void hl_avgpool_forward(
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData);
const int paddingH, const int paddingW,
real* tgtData, const int tgtStride);
/**
* @brief Maximum pool backward.
......@@ -184,6 +189,7 @@ extern void hl_avgpool_forward(
* @param[in] scaleA scale.
* @param[in] scaleB scale.
* @param[out] backGrad output grad.
* @param[in] outStride stride between output data samples.
*
*/
extern void hl_avgpool_backward(
......@@ -195,7 +201,7 @@ extern void hl_avgpool_backward(
const int strideH, const int strideW,
int paddingH, int paddingW,
real scaleA, real scaleB,
real* backGrad);
real* backGrad, const int outStride);
/**
* @brief Cross-map-respose normalize forward.
......
......@@ -44,7 +44,8 @@ inline void hl_maxpool_forward(
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {}
const int paddingH, const int paddingW,
real* tgtData, const int tgtStride) {}
inline void hl_maxpool_backward(
const int frameCnt, const real* inputData,
......@@ -56,7 +57,7 @@ inline void hl_maxpool_backward(
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad) {}
real* targetGrad, const int outStride) {}
inline void hl_avgpool_forward(
const int frameCnt, const real* inputData,
......@@ -65,7 +66,8 @@ inline void hl_avgpool_forward(
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {}
const int paddingH, const int paddingW,
real* tgtData, const int tgtStride) {}
inline void hl_avgpool_backward(
const int frameCnt, const real* outGrad,
......@@ -76,7 +78,7 @@ inline void hl_avgpool_backward(
const int strideH, const int strideW,
int paddingH, int paddingW,
real scaleA, real scaleB,
real* backGrad) {}
real* backGrad, const int outStride) {}
inline void hl_CMRNorm_forward(
size_t frameCnt, const real* in, real* scale, real* out,
......
......@@ -152,7 +152,7 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
const int ksizeW, const int ksizeH,
const int strideH, const int strideW,
const int offsetH, const int offsetW,
real* tgtData) {
real* tgtData, const int tgtStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int pw = index % pooledW;
......@@ -173,7 +173,9 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
maxval = inputData[h * width + w];
}
}
tgtData[index] = maxval;
int tgtIndex = index % (pooledW * pooledH * channels) +
frameNum * tgtStride;
tgtData[tgtIndex] = maxval;
}
}
......@@ -184,7 +186,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real* tgtData) {
real* tgtData, const int tgtStride) {
int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
......@@ -194,7 +196,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
KeMaxPoolForward<<< grid, threads, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, channels, height, width,
pooledH, pooledW, sizeX, sizeY, strideH, strideW,
paddingH, paddingW, tgtData);
paddingH, paddingW, tgtData, tgtStride);
CHECK_SYNC("hl_maxpool_forward failed");
}
......@@ -207,7 +209,7 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
const int strideH, const int strideW,
const int padH, const int padW,
real scaleA, real scaleB,
real* targetGrad) {
real* targetGrad, const int outStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
// find out the local index
......@@ -223,8 +225,8 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0;
real gradient = 0;
real input = inputData[index];
outData += (frameNum * channels + offsetC) * pooledH * pooledW;
outGrad += (frameNum * channels + offsetC) * pooledH * pooledW;
outData += (frameNum * outStride + offsetC * pooledH * pooledW);
outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
if (input == outData[ph * pooledW + pw]) {
......@@ -246,7 +248,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad) {
real* targetGrad, const int outStride) {
int num_kernels = height * width * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
......@@ -257,7 +259,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
strideH, strideW,
paddingH, paddingW,
scaleA, scaleB,
targetGrad);
targetGrad, outStride);
CHECK_SYNC("hl_maxpool_backward");
}
......@@ -268,7 +270,7 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int padH, const int padW,
real* tgtData) {
real* tgtData, const int tgtStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int pw = index % pooledW;
......@@ -293,7 +295,9 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
aveval += inputData[h * width + w];
}
}
tgtData[index] = aveval / pool_size;
int tgtIndex = index % (pooledW * pooledH * channels) +
frameNum * tgtStride;
tgtData[tgtIndex] = aveval / pool_size;
}
}
......@@ -303,14 +307,15 @@ void hl_avgpool_forward(const int frameCnt, const real* inputData,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {
const int paddingH, const int paddingW,
real* tgtData, const int tgtStride) {
int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
KeAvgPoolForward<<< blocks, 1024, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, channels,
height, width, pooledH, pooledW,
sizeX, sizeY, strideH, strideW,
paddingH, paddingW, tgtData);
paddingH, paddingW, tgtData, tgtStride);
CHECK_SYNC("hl_avgpool_forward failed");
}
......@@ -322,7 +327,7 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
const int strideH, const int strideW,
const int padH, const int padW,
real scaleA, real scaleB,
real* tgtGrad) {
real* tgtGrad, const int outStride) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int offsetW = index % width + padW;
......@@ -335,7 +340,8 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0;
int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0;
real gradient = 0;
outGrad += (frameNum * channels + offsetC) * pooledH * pooledW;
outGrad += (frameNum * outStride + offsetC * pooledH * pooledW);
for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
......@@ -360,7 +366,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* backGrad) {
real* backGrad, const int outStride) {
int num_kernels = height * width * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
......@@ -370,7 +376,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
strideH, strideW,
paddingH, paddingW,
scaleA, scaleB,
backGrad);
backGrad, outStride);
CHECK_SYNC("hl_avgpool_backward failed");
}
......
......@@ -562,4 +562,39 @@ void HuberTwoClass::backwardImpIn(
}
}
/**
* This cost layer compute the sum of its input as loss.
* \f[
* o(i) = \sum_{j=1}^D y_{ij}
* \f]
*/
class SumCostLayer : public Layer {
public:
explicit SumCostLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
bool ret = Layer::init(layerMap, parameterMap);
if (!ret) return ret;
CHECK_EQ(inputLayers_.size(), 1UL);
return true;
}
virtual void forward(PassType passType) {
Layer::forward(passType);
const MatrixPtr& input = getInputValue(0);
/* malloc memory for the output_ if necessary */
int batchSize = input->getHeight();
int size = 1;
resizeOutput(batchSize, size);
output_.value->sumRows(*input);
}
virtual void backward(const UpdateCallback& callback = nullptr) {
getInputGrad(0)->add((real)1);
}
};
REGISTER_LAYER(sum_cost, SumCostLayer);
} // namespace paddle
......@@ -129,7 +129,7 @@ protected:
* This cost layer compute Euclidean (L2) loss for real-valued regression
* tasks.
* \f[
* L = \frac{1}{2N} \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2}
* L = \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2}
* \f]
*/
class SumOfSquaresCostLayer : public CostLayer {
......
......@@ -52,10 +52,8 @@ bool PoolLayer::init(const LayerMap& layerMap,
Layer* PoolLayer::create(const LayerConfig& config) {
CHECK_EQ(config.inputs_size(), 1);
const std::string& pool = config.inputs(0).pool_conf().pool_type();
if (pool == "max-projection") {
return new MaxPoolProjectionLayer(config);
} else if (pool == "avg-projection") {
return new AvgPoolProjectionLayer(config);
if (pool == "max-projection" || pool == "avg-projection") {
return new PoolProjectionLayer(config);
#ifndef PADDLE_ONLY_CPU
} else if (CudnnPoolLayer::typeCheck(pool)) {
return new CudnnPoolLayer(config);
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PoolProjection.h"
namespace paddle {
REGISTER_PROJECTION_CREATE_FUNC(pool, &PoolProjection::create);
PoolProjection::PoolProjection(const ProjectionConfig& config,
ParameterPtr parameter, bool useGpu)
: Projection(config, parameter, useGpu) {
const PoolConfig& conf = config_.pool_conf();
poolType_ = conf.pool_type();
channels_ = conf.channels();
sizeX_ = conf.size_x();
stride_ = conf.stride();
outputX_ = conf.output_x();
imgSize_ = conf.img_size();
confPadding_ = conf.padding();
sizeY_ = conf.has_size_y() ? conf.size_y() : conf.size_x();
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride();
confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
}
size_t PoolProjection::getSize() {
imgSizeY_ = in_->getFrameHeight();
imgSize_ = in_->getFrameWidth();
const PoolConfig& conf = config_.pool_conf();
if (imgSizeY_ == 0) {
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
}
if (imgSize_ == 0) {
imgSize_ = conf.img_size();
}
outputY_ = outputSize(imgSizeY_, sizeY_, confPaddingY_, strideY_,
/* caffeMode */ false);
outputX_ = outputSize(imgSize_, sizeX_, confPadding_, stride_,
/* caffeMode */ false);
const_cast<Argument*>(out_)->setFrameHeight(outputY_);
const_cast<Argument*>(out_)->setFrameWidth(outputX_);
return outputY_ * outputX_ * channels_;
}
PoolProjection* PoolProjection::create(const ProjectionConfig& config,
ParameterPtr parameter, bool useGpu) {
const std::string& pool = config.pool_conf().pool_type();
if (pool == "max-projection") {
return new MaxPoolProjection(config, parameter, useGpu);
} else if (pool == "avg-projection") {
return new AvgPoolProjection(config, parameter, useGpu);
} else {
LOG(FATAL) << "Unknown pool type: " << pool;
return nullptr;
}
}
void MaxPoolProjection::forward() {
size_t width = getSize();
CHECK_EQ(width, out_->value->getWidth());
MatrixPtr inputV = in_->value;
MatrixPtr outV = out_->value;
outV->maxPoolForward(*inputV, imgSizeY_, imgSize_, channels_, sizeX_, sizeY_,
strideY_, stride_, outputY_, outputX_, confPaddingY_,
confPadding_);
}
void MaxPoolProjection::backward(const UpdateCallback& callback) {
(void)callback;
MatrixPtr outGrad = out_->grad;
MatrixPtr inputV = in_->value;
MatrixPtr outV = out_->value;
MatrixPtr inputGrad = in_->grad;
if (NULL == inputGrad) {
return;
}
inputGrad->maxPoolBackward(*inputV, imgSizeY_, imgSize_, *outGrad, *outV,
sizeX_, sizeY_, strideY_, stride_, outputY_,
outputX_, 1, 1, confPaddingY_, confPadding_);
}
void AvgPoolProjection::forward() {
size_t width = getSize();
CHECK_EQ(width, out_->value->getWidth());
MatrixPtr inputV = in_->value;
MatrixPtr outV = out_->value;
outV->avgPoolForward(*inputV, imgSizeY_, imgSize_, channels_, sizeX_, sizeY_,
strideY_, stride_, outputY_, outputX_, confPaddingY_,
confPadding_);
}
void AvgPoolProjection::backward(const UpdateCallback& callback) {
(void)callback;
MatrixPtr outputGrad = out_->grad;
MatrixPtr inputGrad = in_->grad;
if (NULL == inputGrad) {
return;
}
inputGrad->avgPoolBackward(*outputGrad, imgSizeY_, imgSize_, sizeX_, sizeY_,
strideY_, stride_, outputY_, outputX_, 1, 1,
confPaddingY_, confPadding_);
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Projection.h"
#include "paddle/math/MathUtils.h"
namespace paddle {
class PoolProjection : public Projection {
protected:
size_t imgSizeY_, imgSize_;
size_t outputY_, outputX_;
size_t strideY_, stride_;
size_t sizeY_, sizeX_;
int confPaddingY_, confPadding_;
size_t channels_;
std::string poolType_;
public:
PoolProjection(const ProjectionConfig& config, ParameterPtr parameter,
bool useGpu);
static PoolProjection* create(const ProjectionConfig& config,
ParameterPtr parameter, bool useGpu);
const std::string& getPoolType() const { return poolType_; }
size_t getSize();
};
class MaxPoolProjection : public PoolProjection {
public:
MaxPoolProjection(const ProjectionConfig& config, ParameterPtr parameter,
bool useGpu)
: PoolProjection(config, parameter, useGpu) {}
virtual void forward();
virtual void backward(const UpdateCallback& callback = nullptr);
};
class AvgPoolProjection : public PoolProjection {
public:
AvgPoolProjection(const ProjectionConfig& config, ParameterPtr parameter,
bool useGpu)
: PoolProjection(config, parameter, useGpu) {}
virtual void forward();
virtual void backward(const UpdateCallback& callback = nullptr);
};
} // namespace paddle
......@@ -18,6 +18,7 @@ limitations under the License. */
namespace paddle {
size_t PoolProjectionLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
......@@ -37,74 +38,23 @@ size_t PoolProjectionLayer::getSize() {
layerSize = outputH_ * outputW_ * channels_;
getOutput().setFrameHeight(outputH_);
getOutput().setFrameWidth(outputW_);
return layerSize;
}
void MaxPoolProjectionLayer::forward(PassType passType) {
Layer::forward(passType);
/* malloc memory for the output_ if necessary */
/* note: one sample correspond to one ROW */
MatrixPtr input = getInputValue(0);
int batchSize = input->getHeight();
int size = getSize();
resetOutput(batchSize, size);
MatrixPtr outV = getOutputValue();
outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, confPaddingY_,
confPadding_);
}
void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
(void)callback;
if (NULL == getInputGrad(0)) {
return;
}
/* Do derivation */
MatrixPtr outGrad = getOutputGrad();
MatrixPtr inputV = getInputValue(0);
MatrixPtr outV = getOutputValue();
MatrixPtr inputGrad = getInputGrad(0);
inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV,
sizeX_, sizeY_, strideY_, stride_, outputH_,
outputW_, 1, 1, confPaddingY_, confPadding_);
}
void AvgPoolProjectionLayer::forward(PassType passType) {
void PoolProjectionLayer::forward(PassType passType) {
Layer::forward(passType);
/* malloc memory for the output_ if necessary */
/* note: one sample correspond to one ROW */
MatrixPtr input = getInputValue(0);
int batchSize = input->getHeight();
const Argument& in = getInput(0);
int batchSize = in.value->getHeight();
int size = getSize();
resetOutput(batchSize, size);
MatrixPtr outV = getOutputValue();
outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, confPaddingY_,
confPadding_);
poolProjection_->forward(&in, &output_, passType);
}
void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
void PoolProjectionLayer::backward(const UpdateCallback& callback) {
(void)callback;
if (NULL == getInputGrad(0)) {
return;
}
/* Do derivation */
MatrixPtr outputGrad = getOutputGrad();
MatrixPtr inputGrad = getInputGrad(0);
inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, 1, 1,
confPaddingY_, confPadding_);
poolProjection_->backward(callback);
}
} // namespace paddle
......@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "PoolLayer.h"
#include "PoolProjection.h"
#include "paddle/math/Matrix.h"
#include <vector>
namespace paddle {
/**
......@@ -27,33 +27,18 @@ class PoolProjectionLayer : public PoolLayer {
protected:
size_t imgSizeH_, imgSizeW_;
size_t outputH_, outputW_;
std::unique_ptr<PoolProjection> poolProjection_;
ProjectionConfig projectionConfig_;
public:
size_t getSize();
explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {}
};
/**
* @brief A layer for max pooling
*/
class MaxPoolProjectionLayer : public PoolProjectionLayer {
public:
explicit MaxPoolProjectionLayer(const LayerConfig& config)
: PoolProjectionLayer(config) {}
~MaxPoolProjectionLayer() {}
explicit PoolProjectionLayer(const LayerConfig& config) : PoolLayer(config) {
PoolConfig* conf = projectionConfig_.mutable_pool_conf();
*conf = config_.inputs(0).pool_conf();
poolProjection_.reset(
PoolProjection::create(projectionConfig_, nullptr, useGpu_));
}
virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr);
};
/**
* @brief A layer for average pooling
*/
class AvgPoolProjectionLayer : public PoolProjectionLayer {
public:
explicit AvgPoolProjectionLayer(const LayerConfig& config)
: PoolProjectionLayer(config) {}
~AvgPoolProjectionLayer() {}
size_t getSize();
virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr);
......
......@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/parameter/Parameter.h"
#include "ModelConfig.pb.h"
#include "Layer.h"
#include "ModelConfig.pb.h"
#include "paddle/parameter/Parameter.h"
namespace paddle {
......@@ -28,6 +27,11 @@ namespace paddle {
Projection::registrar_.registerClass<__class_name>(#__type_name); \
})
#define REGISTER_PROJECTION_CREATE_FUNC(__type_name, createFunction) \
static InitFunction __reg_type_##__type_name([]() { \
Projection::registrar_.registerClass(#__type_name, createFunction); \
})
/**
* A projection takes one Argument as input, calculate the result and add it
* to output Argument.
......@@ -50,7 +54,8 @@ public:
registrar_;
/**
* Forward propagation. If backward() will be called, in and out must be kept valid until then.
* Forward propagation. If backward() will be called, in and out must be kept
* valid until then.
* @param in input of projection
* @param out output of projection
* @param passType PASS_TRAIN of PASS_TEST
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "SpatialPyramidPoolLayer.h"
namespace paddle {
REGISTER_LAYER(spp, SpatialPyramidPoolLayer);
ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW,
size_t imgSizeH,
size_t channels,
size_t pyramidLevel,
std::string& poolType) {
ProjectionConfig config;
config.set_type("pool");
PoolConfig* conf = config.mutable_pool_conf();
conf->set_channels(channels);
conf->set_img_size(imgSizeW);
conf->set_img_size_y(imgSizeH);
conf->set_pool_type(poolType);
int numBins = std::pow(2, pyramidLevel);
int sizeH = std::ceil(imgSizeH / static_cast<double>(numBins));
int paddingH = (sizeH * numBins - imgSizeH + 1) / 2;
int outSizeH = outputSize(imgSizeH, sizeH, paddingH, sizeH, true);
int sizeW = std::ceil(imgSizeW / static_cast<double>(numBins));
int paddingW = (sizeW * numBins - imgSizeW + 1) / 2;
int outSizeW = outputSize(imgSizeW, sizeW, paddingW, sizeW, true);
conf->set_stride(sizeW);
conf->set_stride_y(sizeH);
conf->set_size_x(sizeW);
conf->set_size_y(sizeH);
conf->set_padding(paddingW);
conf->set_padding_y(paddingH);
conf->set_output_x(outSizeW);
conf->set_output_y(outSizeH);
config.set_output_size(outSizeH * outSizeW * channels);
return config;
}
size_t SpatialPyramidPoolLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
const SppConfig& sppConf = config_.inputs(0).spp_conf();
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
}
if (imgSizeW_ == 0) {
imgSizeW_ = sppConf.img_size();
}
size_t outputH = 1;
size_t outputW = (std::pow(4, pyramidHeight_) - 1) / (4 - 1);
layerSize = outputH * outputW * channels_;
return layerSize;
}
bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK_EQ(config_.inputs_size(), 1);
const SppConfig& sppConf = config_.inputs(0).spp_conf();
pyramidHeight_ = sppConf.pyramid_height();
poolType_ = sppConf.pool_type();
channels_ = sppConf.channels();
imgSizeW_ = sppConf.img_size();
imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
poolProjections_.reserve(pyramidHeight_);
projCol_.reserve(pyramidHeight_);
projOutput_.resize(pyramidHeight_);
size_t startCol = 0;
size_t endCol = 0;
for (size_t i = 0; i < pyramidHeight_; i++) {
poolProjections_.emplace_back(PoolProjection::create(
getConfig(imgSizeW_, imgSizeH_, channels_, i, poolType_), nullptr,
useGpu_));
endCol += poolProjections_[i]->getOutputSize();
projCol_.push_back(std::make_pair(startCol, endCol));
startCol = endCol;
}
CHECK_EQ(endCol, getSize());
return true;
}
void SpatialPyramidPoolLayer::forward(PassType passType) {
Layer::forward(passType);
int batchSize = getInput(0).getBatchSize();
resetOutput(batchSize, getSize());
for (size_t i = 0; i < pyramidHeight_; i++) {
size_t startCol = projCol_[i].first;
size_t endCol = projCol_[i].second;
projOutput_[i].value = output_.value->subColMatrix(startCol, endCol);
projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol);
}
for (size_t i = 0; i < pyramidHeight_; i++) {
poolProjections_[i]->forward(&getInput(0), &projOutput_[i], passType);
}
}
void SpatialPyramidPoolLayer::backward(const UpdateCallback& callback) {
for (size_t i = 0; i < pyramidHeight_; i++) {
if (poolProjections_[i]) {
poolProjections_[i]->backward(callback);
}
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
#include "PoolProjection.h"
#include "paddle/math/MathUtils.h"
#include "paddle/utils/Logging.h"
namespace paddle {
/**
* @brief A layer for spatial pyramid pooling on the input image by taking
* the max, average, etc. within regions, so that the result vector of
* different sized images are of the same size.
*
* The config file api is spp_layer.
*/
class SpatialPyramidPoolLayer : public Layer {
protected:
size_t channels_;
size_t imgSizeW_;
size_t imgSizeH_;
size_t pyramidHeight_;
std::string poolType_;
std::vector<std::unique_ptr<PoolProjection>> poolProjections_;
std::vector<Argument> projOutput_;
std::vector<std::pair<size_t, size_t>> projCol_;
public:
explicit SpatialPyramidPoolLayer(const LayerConfig& config) : Layer(config) {}
~SpatialPyramidPoolLayer() {}
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
ProjectionConfig getConfig(size_t sizeX_, size_t sizeY_, size_t channels,
size_t pyamidLevel_, std::string& poolType_);
size_t getSize();
virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr);
};
} // namespace paddle
......@@ -13,15 +13,15 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include "paddle/gserver/layers/DataLayer.h"
#include <vector>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/math/MathUtils.h"
#include "TestUtil.h"
#include "LayerGradUtil.h"
#include "TestUtil.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
......@@ -981,6 +981,32 @@ TEST(Layer, PoolLayer) {
#endif
}
void testSppLayer(const string& poolType, const int pyramidHeight, bool trans,
bool useGpu) {
TestConfig config;
config.layerConfig.set_type("spp");
config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
SppConfig* sppConfig = input->mutable_spp_conf();
sppConfig->set_pool_type(poolType);
sppConfig->set_pyramid_height(pyramidHeight);
sppConfig->set_channels(16);
sppConfig->set_img_size(10);
sppConfig->set_img_size_y(20);
int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1);
config.layerConfig.set_size(outputSize * sppConfig->channels());
testLayerGrad(config, "spp", 100, trans, useGpu);
}
TEST(Layer, SpatialPyramidPoolLayer) {
for (auto useGpu : {false, true}) {
for (auto pyramidHeight : {1, 2, 3}) {
testSppLayer("avg-projection", pyramidHeight, false, useGpu);
testSppLayer("max-projection", pyramidHeight, false, useGpu);
}
}
}
TEST(Layer, rankCostLayer) {
TestConfig config;
config.layerConfig.set_type("rank-cost");
......@@ -998,6 +1024,19 @@ TEST(Layer, rankCostLayer) {
}
}
TEST(Layer, sumCostLayer) {
TestConfig config;
config.layerConfig.set_type("sum_cost");
config.biasSize = 0;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "sum_cost", 100, false, useGpu);
}
}
TEST(Layer, weightedRankCostLayer) {
TestConfig config;
config.layerConfig.set_type("rank-cost");
......
此差异已折叠。
......@@ -378,7 +378,7 @@ hl_activation_mode_t hlActiveType(const std::string& type) {
return HL_ACTIVATION_RELU;
} else if (type == "tanh") {
return HL_ACTIVATION_TANH;
} else if (type == "linear") {
} else if (type == "linear" || type == "") {
return HL_ACTIVATION_LINEAR;
} else {
LOG(FATAL) << "Do not support activation type " << type;
......
......@@ -120,6 +120,14 @@ message PoolConfig {
optional uint32 padding_y = 13 [default = 0];
}
message SppConfig {
required string pool_type = 1;
required uint32 pyramid_height = 2;
required uint32 channels = 3;
required uint32 img_size = 4;
optional uint32 img_size_y = 5;
}
message NormConfig {
// rnorm or cmrnorm
required string norm_type = 1;
......@@ -196,6 +204,9 @@ message ProjectionConfig {
// For IdentityOffsetProjection
optional uint64 offset = 11 [default = 0];
// For pool
optional PoolConfig pool_conf = 12;
}
message OperatorConfig {
......@@ -245,6 +256,7 @@ message LayerInputConfig {
optional string input_layer_argument = 9;
optional BilinearInterpConfig bilinear_interp_conf = 10;
optional MaxOutConfig maxout_conf = 11;
optional SppConfig spp_conf = 12;
}
message LayerConfig {
......
......@@ -218,7 +218,7 @@ def Inputs(*args):
@config_func
def HasInputsSet():
return len(g_config.model_config.input_layer_names) != 0
return len(g_current_submodel.input_layer_names) != 0
# Define the name of the output layers of the NeuralNetwork.
......@@ -471,6 +471,7 @@ class Input(Cfg):
image=None,
block_expand=None,
maxout=None,
spp=None,
format=None,
nnz=None,
is_static=None,
......@@ -671,7 +672,6 @@ class ConvProjection(Projection):
def calc_parameter_dims(self, input_size, output_size):
return None
# Define a operator for mixed layer
@config_class
class Operator(Cfg):
......@@ -796,6 +796,17 @@ class Pool(Cfg):
padding_y = None):
self.add_keys(locals())
# please refer to the comments in proto/ModelConfig.proto
@config_class
class SpatialPyramidPool(Cfg):
def __init__(
self,
pool_type,
pyramid_height,
channels,
img_width = None):
self.add_keys(locals())
# please refer to the comments in proto/ModelConfig.proto
@config_class
class Norm(Cfg):
......@@ -1081,6 +1092,22 @@ def parse_pool(pool, input_layer_name, pool_conf):
pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
pool_conf.padding_y, pool_conf.stride_y, False)
def parse_spp(spp, input_layer_name, spp_conf):
spp_conf.pool_type = spp.pool_type
config_assert(spp.pool_type in ['max-projection', 'avg-projection'],
"pool-type %s is not in " "['max-projection', 'avg-projection']"
% spp.pool_type)
spp_conf.pyramid_height = spp.pyramid_height
spp_conf.channels = spp.channels
img_pixels = g_layer_map[input_layer_name].size / spp_conf.channels
spp_conf.img_size = default(spp.img_width, int(img_pixels ** 0.5))
spp_conf.img_size_y = img_pixels / spp_conf.img_size
config_assert(spp_conf.img_size * spp_conf.img_size_y == img_pixels,
"Incorrect input image size %d for input image pixels %d"
% (spp_conf.img_size, img_pixels))
def parse_image(image, input_layer_name, image_conf):
image_conf.channels = image.channels
image_pixels = g_layer_map[input_layer_name].size / image_conf.channels
......@@ -1756,6 +1783,25 @@ class PoolLayer(LayerBase):
name, pool_conf.output_y, pool_conf.output_x))
self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels)
@config_layer('spp')
class SpatialPyramidPoolLayer(LayerBase):
def __init__(
self,
name,
inputs,
device=None):
super(SpatialPyramidPoolLayer, self).__init__(name, 'spp', 0, inputs=inputs, device=device)
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
parse_spp(
self.inputs[input_index].spp,
input_layer.name,
self.config.inputs[input_index].spp_conf)
spp_conf = self.config.inputs[input_index].spp_conf
output_size = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1)
print("output size for %s is %d " % (name, output_size))
self.set_layer_size(output_size * spp_conf.channels)
@config_layer('batch_norm')
class BatchNormLayer(LayerBase):
layer_type = 'batch_norm'
......@@ -1903,6 +1949,7 @@ define_cost('SumOfSquaresCostLayer', 'square_error')
define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
define_cost('HuberTwoClass', 'huber')
define_cost('SumCost', 'sum_cost')
@config_layer('hsigmoid')
class HierarchicalSigmoidLayer(LayerBase):
......@@ -3015,7 +3062,7 @@ def Layer(
layer_func = layers.get(type)
config_assert(layer_func,
"layer type '%s' not supported." % type)
layer_func(name, **xargs)
return layer_func(name, **xargs)
@config_func
def ParameterHook(
......
......@@ -20,3 +20,6 @@ from layers import *
from networks import *
from optimizers import *
from attrs import *
# This will enable operator overload for LayerOutput
import math
......@@ -13,10 +13,11 @@
# limitations under the License.
from .layers import LayerOutput, mixed_layer, identity_projection, \
slope_intercept_layer
slope_intercept_layer, scaling_layer, repeat_layer
from .attrs import is_compatible_with
from .default_decorators import *
import activations as act
from paddle.trainer.config_parser import logger
__all__ = []
......@@ -40,7 +41,21 @@ register_unary_math_op('square', act.SquareActivation())
def add(layeroutput, other):
if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other)
assert isinstance(other, LayerOutput)
if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be added with"
" another LayerOutput or a number")
if layeroutput.size == other.size:
return mixed_layer(input=[identity_projection(input=layeroutput),
identity_projection(input=other)])
if other.size != 1 and layeroutput.size != 1:
logger.fatal("Two LayerOutput can be added only if they have equal size"
" or one of their sizes is 1. sizes are %s and %s" %
(layeroutput.size, other.size))
elif layeroutput.size == 1:
tmp = layeroutput
layeroutput = other
other = tmp
other = repeat_layer(other, layeroutput.size)
return mixed_layer(input=[identity_projection(input=layeroutput),
identity_projection(input=other)])
......@@ -50,10 +65,11 @@ LayerOutput.__add__ = add
def sub(layeroutput, other):
if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, intercept=other)
assert isinstance(other, LayerOutput)
if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be subtracted with"
" another Layeroutput or a number")
neg = slope_intercept_layer(input=other, slope=-1.0)
return mixed_layer(input=[identity_projection(input=layeroutput),
identity_projection(input=neg)])
return add(layeroutput, neg)
LayerOutput.__sub__ = sub
......@@ -62,3 +78,20 @@ def rsub(layeroutput, other):
return add(neg, other)
LayerOutput.__rsub__ = rsub
def mul(layeroutput, other):
if is_compatible_with(other, float):
return slope_intercept_layer(input=layeroutput, slope=other)
if not isinstance(other, LayerOutput):
logger.fatal("LayerOutput can only be multiplied with"
" another Layeroutput or a number")
elif layeroutput.size == 1:
return scaling_layer(input=other, weight=layeroutput)
elif other.size == 1:
return scaling_layer(input=layeroutput, weight=other)
else:
logger.fatal("At least one of the operand of '*' must be a number"
" or a LayerOutput with size=1")
LayerOutput.__mul__ = mul
LayerOutput.__rmul__ = mul
......@@ -11,8 +11,8 @@ test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_spilit_datasource)
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_split_datasource)
for conf in ${configs[*]}
......
......@@ -19,6 +19,12 @@ y = x + y
y = y - x
y = y - 2
y = 2 - y
y = 2 * y
y = y * 3
z= data_layer(name='data_2', size=1)
y = y * z
y = z * y
y = y + z
y = z + y
outputs(y)
......@@ -209,8 +209,129 @@ layers {
slope: 1.0
intercept: 2
}
layers {
name: "__slope_intercept_layer_6__"
type: "slope_intercept"
size: 100
active_type: ""
inputs {
input_layer_name: "__slope_intercept_layer_5__"
}
slope: 2
intercept: 0.0
}
layers {
name: "__slope_intercept_layer_7__"
type: "slope_intercept"
size: 100
active_type: ""
inputs {
input_layer_name: "__slope_intercept_layer_6__"
}
slope: 3
intercept: 0.0
}
layers {
name: "data_2"
type: "data"
size: 1
active_type: ""
}
layers {
name: "__scaling_layer_0__"
type: "scaling"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
inputs {
input_layer_name: "__slope_intercept_layer_7__"
}
}
layers {
name: "__scaling_layer_1__"
type: "scaling"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
inputs {
input_layer_name: "__scaling_layer_0__"
}
}
layers {
name: "__repeat_layer_0__"
type: "featmap_expand"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
num_filters: 100
}
layers {
name: "__mixed_2__"
type: "mixed"
size: 100
active_type: ""
inputs {
input_layer_name: "__scaling_layer_1__"
proj_conf {
type: "identity"
name: "___mixed_2__.w0"
input_size: 100
output_size: 100
}
}
inputs {
input_layer_name: "__repeat_layer_0__"
proj_conf {
type: "identity"
name: "___mixed_2__.w1"
input_size: 100
output_size: 100
}
}
}
layers {
name: "__repeat_layer_1__"
type: "featmap_expand"
size: 100
active_type: ""
inputs {
input_layer_name: "data_2"
}
num_filters: 100
}
layers {
name: "__mixed_3__"
type: "mixed"
size: 100
active_type: ""
inputs {
input_layer_name: "__mixed_2__"
proj_conf {
type: "identity"
name: "___mixed_3__.w0"
input_size: 100
output_size: 100
}
}
inputs {
input_layer_name: "__repeat_layer_1__"
proj_conf {
type: "identity"
name: "___mixed_3__.w1"
input_size: 100
output_size: 100
}
}
}
input_layer_names: "data_2"
input_layer_names: "data"
output_layer_names: "__slope_intercept_layer_5__"
output_layer_names: "__mixed_3__"
sub_models {
name: "root"
layer_names: "data"
......@@ -228,8 +349,18 @@ sub_models {
layer_names: "__slope_intercept_layer_3__"
layer_names: "__slope_intercept_layer_4__"
layer_names: "__slope_intercept_layer_5__"
layer_names: "__slope_intercept_layer_6__"
layer_names: "__slope_intercept_layer_7__"
layer_names: "data_2"
layer_names: "__scaling_layer_0__"
layer_names: "__scaling_layer_1__"
layer_names: "__repeat_layer_0__"
layer_names: "__mixed_2__"
layer_names: "__repeat_layer_1__"
layer_names: "__mixed_3__"
input_layer_names: "data_2"
input_layer_names: "data"
output_layer_names: "__slope_intercept_layer_5__"
output_layer_names: "__mixed_3__"
is_recurrent_layer_group: false
}
......@@ -23,6 +23,17 @@ layers {
size: 10
active_type: ""
}
layers {
name: "__fc_layer_0__"
type: "fc"
size: 4
active_type: "tanh"
inputs {
input_layer_name: "input"
input_parameter_name: "___fc_layer_0__.w0"
}
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers {
name: "__ctc_layer_0__"
type: "ctc"
......@@ -36,17 +47,6 @@ layers {
}
norm_by_times: false
}
layers {
name: "__fc_layer_0__"
type: "fc"
size: 4
active_type: "tanh"
inputs {
input_layer_name: "input"
input_parameter_name: "___fc_layer_0__.w0"
}
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers {
name: "crf_label"
type: "data"
......@@ -191,6 +191,16 @@ layers {
}
coeff: 1.0
}
layers {
name: "__sum_cost_0__"
type: "sum_cost"
size: 1
active_type: ""
inputs {
input_layer_name: "__fc_layer_0__"
}
coeff: 1.0
}
parameters {
name: "___fc_layer_0__.w0"
size: 800
......@@ -241,14 +251,15 @@ output_layer_names: "__cross_entropy_0__"
output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "labels"
layer_names: "probs"
layer_names: "xe-label"
layer_names: "__ctc_layer_0__"
layer_names: "__fc_layer_0__"
layer_names: "__ctc_layer_0__"
layer_names: "crf_label"
layer_names: "__crf_layer_0__"
layer_names: "left"
......@@ -264,6 +275,7 @@ sub_models {
layer_names: "huber_label"
layer_names: "__huber_cost_0__"
layer_names: "__multi_binary_label_cross_entropy_0__"
layer_names: "__sum_cost_0__"
input_layer_names: "input"
input_layer_names: "labels"
input_layer_names: "crf_label"
......@@ -284,6 +296,7 @@ sub_models {
output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "data"
type: "data"
size: 3200
active_type: ""
}
layers {
name: "__spp_0__"
type: "spp"
size: 80
active_type: ""
inputs {
input_layer_name: "data"
spp_conf {
pool_type: "max-projection"
pyramid_height: 2
channels: 16
img_size: 10
img_size_y: 20
}
}
}
input_layer_names: "data"
output_layer_names: "__spp_0__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__spp_0__"
input_layer_names: "data"
output_layer_names: "__spp_0__"
is_recurrent_layer_group: false
}
......@@ -11,8 +11,9 @@ labels = data_layer(name='labels', size=5000)
probs = data_layer(name='probs', size=10)
xe_label = data_layer(name='xe-label', size=10)
hidden = fc_layer(input=seq_in, size=4)
outputs(ctc_layer(input=seq_in, label=labels),
crf_layer(input=fc_layer(input=seq_in, size=4),
crf_layer(input=hidden,
label=data_layer(name='crf_label', size=4)),
rank_cost(left=data_layer(name='left', size=1),
right=data_layer(name='right', size=1),
......@@ -23,4 +24,5 @@ outputs(ctc_layer(input=seq_in, label=labels),
cross_entropy_with_selfnorm(input=probs, label=xe_label),
huber_cost(input=data_layer(name='huber_probs', size=1),
label=data_layer(name='huber_label', size=1)),
multi_binary_label_cross_entropy(input=probs, label=xe_label))
multi_binary_label_cross_entropy(input=probs, label=xe_label),
sum_cost(input=hidden))
from paddle.trainer_config_helpers import *
settings(
batch_size=100,
learning_rate=1e-5
)
data = data_layer(name='data', size=3200)
spp = spp_layer(input=data,
pyramid_height=2,
num_channels=16,
pool_type=MaxPooling(),
img_width=10)
outputs(spp)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册