提交 3dd8c9be 编写于 作者: L luotao1 提交者: qingqing01

add maxout layer, including interface and unittest (#229)

* add maxout layer, including interface and unittest

* follow maxout comments

* auto setting channels

* fix unittest bug in test_RecurrentGradientMachine
上级 e83950b0
...@@ -73,6 +73,12 @@ img_pool_layer ...@@ -73,6 +73,12 @@ img_pool_layer
:members: img_pool_layer :members: img_pool_layer
:noindex: :noindex:
maxout_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: maxout_layer
:noindex:
Norm Layer Norm Layer
========== ==========
......
...@@ -169,7 +169,7 @@ extern void hl_avgpool_forward( ...@@ -169,7 +169,7 @@ extern void hl_avgpool_forward(
* @brief Maximum pool backward. * @brief Maximum pool backward.
* *
* @param[in] frameCnt batch size of input image. * @param[in] frameCnt batch size of input image.
* @param[in] outGrad input data. * @param[in] outGrad output grad data.
* @param[in] channels number of channel. * @param[in] channels number of channel.
* @param[in] height image height. * @param[in] height image height.
* @param[in] width image width. * @param[in] width image width.
...@@ -240,4 +240,34 @@ extern void hl_CMRNorm_backward( ...@@ -240,4 +240,34 @@ extern void hl_CMRNorm_backward(
size_t channels, size_t height, size_t width, size_t sizeX, size_t channels, size_t height, size_t width, size_t sizeX,
real alpha, real beta); real alpha, real beta);
/**
* @brief MaxOut forward.
*
* @param[in] inData input data.
* @param[out] outData output data.
* @param[out] idData output maxId.
* @param[in] batchSize batchSize.
* @param[in] size number of channels * image height * image width.
* @param[in] featLen feature length = image height * image width.
* @param[in] groups number of groups.
*/
extern void hl_maxout_forward(
const real* inData, real* outData, int* idData,
size_t batchSize, size_t size, size_t featLen, size_t groups);
/**
* @brief MaxOut backward.
*
* @param[out] inGrad input grad data.
* @param[in] outGrad output grad data.
* @param[in] idData output maxId.
* @param[in] batchSize batchSize.
* @param[in] size number of channels * image height * image width.
* @param[in] featLen feature length = image height * image width.
* @param[in] groups number of groups.
*/
extern void hl_maxout_backward(
real* inGrad, const real* outGrad, const int* idData,
size_t batchSize, size_t size, size_t featLen, size_t groups);
#endif /* HL_CNN_H_ */ #endif /* HL_CNN_H_ */
...@@ -89,4 +89,12 @@ inline void hl_CMRNorm_backward( ...@@ -89,4 +89,12 @@ inline void hl_CMRNorm_backward(
size_t channels, size_t height, size_t width, size_t sizeX, size_t channels, size_t height, size_t width, size_t sizeX,
real alpha, real beta) {} real alpha, real beta) {}
inline void hl_maxout_forward(
const real* inData, real* outData, int* idData,
size_t batchSize, size_t size, size_t featLen, size_t group) {}
inline void hl_maxout_backward(
real* inGrad, const real* outGrad, const int* idData,
size_t batchSize, size_t size, size_t featLen, size_t group) {}
#endif // HL_CNN_STUB_H_ #endif // HL_CNN_STUB_H_
...@@ -531,3 +531,62 @@ void hl_CMRNorm_backward(size_t frameCnt, const real* inV, ...@@ -531,3 +531,62 @@ void hl_CMRNorm_backward(size_t frameCnt, const real* inV,
height, width, sizeX, alpha, beta, inDiff); height, width, sizeX, alpha, beta, inDiff);
CHECK_SYNC("hl_CMRNorm_backward"); CHECK_SYNC("hl_CMRNorm_backward");
} }
__global__ void maxoutFpCompute(size_t nthreads, const real * inData,
real * outData, int* idData,
size_t size, size_t featLen, size_t groups) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if(index < nthreads) {
size_t batch_idx = index / size;
size_t i = index % size;
size_t channel_idx = i / featLen;
size_t feat_idx = i % featLen;
size_t data_idx = (batch_idx * size + channel_idx * featLen) * groups + feat_idx;
real max = inData[data_idx];
int maxId = 0;
for (size_t g = 1; g < groups; ++g) {
real tmp = inData[data_idx + g * featLen];
if (tmp > max) {
max = tmp;
maxId = g;
}
}
outData[index] = max;
idData[index] = maxId;
}
}
void hl_maxout_forward(const real* inData, real* outData,
int* idData, size_t batchSize, size_t size,
size_t featLen, size_t groups) {
int num_kernels = size * batchSize;
int blocks = (num_kernels + 1024 - 1) / 1024;
maxoutFpCompute<<< blocks, 1024, 0, STREAM_DEFAULT>>>(
num_kernels, inData, outData, idData, size, featLen, groups);
CHECK_SYNC("hl_maxout_forward failed");
}
__global__ void maxoutBpCompute(size_t nthreads, real* inGrad,
const real* outGrad, const int* idData,
size_t size, size_t featLen, size_t groups) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if(index < nthreads) {
size_t batch_idx = index / size;
size_t i = index % size;
size_t channel_idx = i / featLen;
size_t feat_idx = i % featLen;
size_t newIndex = batch_idx * size;
size_t gradIdx = (channel_idx * groups + (idData + newIndex)[i]) * featLen + feat_idx;
(inGrad + newIndex * groups)[gradIdx] += (outGrad + newIndex)[i];
}
}
void hl_maxout_backward(real* inGrad, const real* outGrad,
const int* idData, size_t batchSize, size_t size,
size_t featLen, size_t groups) {
int num_kernels = size * batchSize;
int blocks = (num_kernels + 1024 - 1) / 1024;
maxoutBpCompute<<< blocks, 1024, 0, STREAM_DEFAULT >>>(
num_kernels, inGrad, outGrad, idData, size, featLen, groups);
CHECK_SYNC("hl_maxout_backward failed");
}
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MaxOutLayer.h"
#include "hl_gpu.h"
#include "hl_cnn.h"
namespace paddle {
REGISTER_LAYER(maxout, MaxOutLayer);
size_t MaxOutLayer::getSize() {
const MaxOutConfig& maxoutConf = config_.inputs(0).maxout_conf();
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = maxoutConf.img_size_y();
}
if (imgSizeW_ == 0) {
imgSizeW_ = maxoutConf.img_size_x();
}
featLen_ = imgSizeH_ * imgSizeW_;
size_t layerSize = featLen_ * outputChannels_;
getOutput().setFrameHeight(imgSizeH_);
getOutput().setFrameWidth(imgSizeW_);
return layerSize;
}
bool MaxOutLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
/* the size of inputs for maxout-layer is 1 */
CHECK_EQ(config_.inputs_size(), 1UL);
const MaxOutConfig& conf = config_.inputs(0).maxout_conf();
groups_ = conf.groups();
channels_ = conf.channels();
CHECK_EQ(channels_ % groups_, 0UL);
outputChannels_ = channels_ / groups_;
return true;
}
void MaxOutLayer::forward(PassType passType) {
Layer::forward(passType);
/* malloc memory for the output_ if necessary */
/* note: one sample correspond to one column */
size_t batchSize = getInput(0).getBatchSize();
size_t size = getSize();
resetOutput(batchSize, size);
MatrixPtr inputV = getInputValue(0);
MatrixPtr outV = getOutputValue();
IVector::resizeOrCreate(maxoutId_, size * batchSize, useGpu_);
outV->maxoutForward(*inputV, *maxoutId_, outputChannels_, groups_);
}
void MaxOutLayer::backward(const UpdateCallback& callback) {
(void)callback;
/* Do derivation */
MatrixPtr inputG = getInputGrad(0);
MatrixPtr outG = getOutputGrad();
if (inputG) {
inputG->maxoutBackward(*outG, *maxoutId_, outputChannels_, groups_);
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* A layer to do max out on conv layer output.
* Input: output of a conv layer.
* Output: feature map size same as input. Channel is (input channel) / groups.
* So the num of channels should be able to devided by groups.
*
* The config file api is maxout_layer.
*/
class MaxOutLayer : public Layer {
protected:
size_t groups_;
size_t imgSizeH_, imgSizeW_;
/// outputChannels_ = channels_ / groups_
size_t channels_, outputChannels_;
/// feature length = imgSizeH_ * imgSizeW_
size_t featLen_;
IVectorPtr maxoutId_;
public:
/// return imgSizeH_ * imgSizeW_ * outputChannels_;
size_t getSize();
explicit MaxOutLayer(const LayerConfig& config) : Layer(config) {}
virtual ~MaxOutLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr);
};
} // namespace paddle
...@@ -14,12 +14,15 @@ ...@@ -14,12 +14,15 @@
from paddle.trainer.PyDataProvider2 import * from paddle.trainer.PyDataProvider2 import *
# Note that each config should has an independent provider
# in current design of PyDataProvider2.
#######################################################
data = [ data = [
[[[1, 3, 2], [4, 5, 2]], 0], [[[1, 3, 2], [4, 5, 2]], 0],
[[[0, 2], [2, 5], [0, 1, 2]], 1], [[[0, 2], [2, 5], [0, 1, 2]], 1],
] ]
# Used for sequence_nest_rnn.conf
@provider(input_types=[integer_value_sub_sequence(10), @provider(input_types=[integer_value_sub_sequence(10),
integer_value(3)], integer_value(3)],
should_shuffle=False) should_shuffle=False)
...@@ -27,7 +30,7 @@ def process_subseq(settings, file_name): ...@@ -27,7 +30,7 @@ def process_subseq(settings, file_name):
for d in data: for d in data:
yield d yield d
# Used for sequence_rnn.conf
@provider(input_types=[integer_value_sequence(10), @provider(input_types=[integer_value_sequence(10),
integer_value(3)], integer_value(3)],
should_shuffle=False) should_shuffle=False)
...@@ -38,11 +41,32 @@ def process_seq(settings, file_name): ...@@ -38,11 +41,32 @@ def process_seq(settings, file_name):
seq += subseq seq += subseq
yield seq, d[1] yield seq, d[1]
# Used for sequence_nest_rnn_multi_input.conf
@provider(input_types=[integer_value_sub_sequence(10),
integer_value(3)],
should_shuffle=False)
def process_subseq2(settings, file_name):
for d in data:
yield d
# Used for sequence_rnn_multi_input.conf
@provider(input_types=[integer_value_sequence(10),
integer_value(3)],
should_shuffle=False)
def process_seq2(settings, file_name):
for d in data:
seq = []
for subseq in d[0]:
seq += subseq
yield seq, d[1]
###########################################################
data2 = [ data2 = [
[[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]] ,0], [[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]] ,0],
[[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]], 1], [[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]], 1],
] ]
# Used for sequence_nest_rnn_multi_unequalength_inputs.conf
@provider(input_types=[integer_value_sub_sequence(10), @provider(input_types=[integer_value_sub_sequence(10),
integer_value_sub_sequence(10), integer_value_sub_sequence(10),
integer_value(2)], integer_value(2)],
...@@ -52,6 +76,7 @@ def process_unequalength_subseq(settings, file_name): ...@@ -52,6 +76,7 @@ def process_unequalength_subseq(settings, file_name):
yield d yield d
# Used for sequence_rnn_multi_unequalength_inputs.conf
@provider(input_types=[integer_value_sequence(10), @provider(input_types=[integer_value_sequence(10),
integer_value_sequence(10), integer_value_sequence(10),
integer_value(2)], integer_value(2)],
......
...@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import * ...@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None, test_list=None,
module='rnn_data_provider', module='rnn_data_provider',
obj='process_subseq') obj='process_subseq2')
settings(batch_size=2, learning_rate=0.01) settings(batch_size=2, learning_rate=0.01)
......
...@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import * ...@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None, test_list=None,
module='rnn_data_provider', module='rnn_data_provider',
obj='process_seq') obj='process_seq2')
settings(batch_size=2, learning_rate=0.01) settings(batch_size=2, learning_rate=0.01)
......
...@@ -307,6 +307,24 @@ TEST(Layer, blockExpandLayer) { ...@@ -307,6 +307,24 @@ TEST(Layer, blockExpandLayer) {
} }
} }
TEST(Layer, maxoutLayer) {
TestConfig config;
config.biasSize = 0;
config.layerConfig.set_type("maxout");
config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
MaxOutConfig* maxout = input->mutable_maxout_conf();
maxout->set_img_size_x(32);
maxout->set_img_size_y(32);
maxout->set_channels(4);
maxout->set_groups(2);
for (auto useGpu : {false, true}) {
testLayerGrad(config, "maxout", 10, false, useGpu);
}
}
void testFcLayer(string format, size_t nnz) { void testFcLayer(string format, size_t nnz) {
TestConfig config; TestConfig config;
config.biasSize = 4096; config.biasSize = 4096;
......
...@@ -583,6 +583,42 @@ void GpuMatrix::colMax(Matrix& max) { ...@@ -583,6 +583,42 @@ void GpuMatrix::colMax(Matrix& max) {
max.maxCols(*this); max.maxCols(*this);
} }
void GpuMatrix::colMax(IVector& maxIds, Matrix& maxVal) {
LOG(FATAL) << "Is not supported";
}
void GpuMatrix::maxoutForward(Matrix& a, IVector& id, size_t channels,
size_t groups) {
CHECK(dynamic_cast<GpuMatrix*>(&a));
CHECK(dynamic_cast<GpuIVector*>(&id));
CHECK_EQ(a.getHeight(), getHeight());
size_t size = getWidth();
size_t batchSize = getHeight();
const real* input = a.getData();
real* output = getData();
int* idForGpu = id.getData();
hl_maxout_forward(input, output, idForGpu, batchSize, size,
size / channels, groups);
}
void GpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels,
size_t groups) {
CHECK(dynamic_cast<GpuMatrix*>(&a));
CHECK(dynamic_cast<GpuIVector*>(&id));
CHECK_EQ(a.getHeight(), getHeight());
size_t size = a.getWidth();
size_t batchSize = getHeight();
real* input = getData();
const real* output = a.getData();
const int* idForGpu = id.getData();
hl_maxout_backward(input, output, idForGpu, batchSize, size,
size / channels, groups);
}
/*calulate the error of classification */ /*calulate the error of classification */
void GpuMatrix::classificationError(MatrixPtr output, IVectorPtr label) { void GpuMatrix::classificationError(MatrixPtr output, IVectorPtr label) {
GpuMatrixPtr output_ptr = std::dynamic_pointer_cast<GpuMatrix>(output); GpuMatrixPtr output_ptr = std::dynamic_pointer_cast<GpuMatrix>(output);
...@@ -2748,6 +2784,95 @@ void CpuMatrix::colMax(Matrix& max) { ...@@ -2748,6 +2784,95 @@ void CpuMatrix::colMax(Matrix& max) {
max.maxCols(*this); max.maxCols(*this);
} }
void CpuMatrix::colMax(IVector& maxIds, Matrix& maxVal) {
CHECK(isContiguous());
CHECK(!maxIds.useGpu() && !maxVal.useGpu()) << "Matrix type are not equal";
size_t numSamples = getWidth();
size_t beam = maxVal.getHeight();
CHECK_EQ(maxIds.getSize(), numSamples * beam);
CHECK_EQ(maxVal.getWidth(), numSamples);
real* a = getData();
int* s = maxIds.getData();
real* t = maxVal.getData();
size_t dim = getHeight();
for (size_t i = 0; i < numSamples; i++) {
std::vector<std::pair<real, size_t>> vec;
for (size_t j = 0; j < dim; j++) {
vec.push_back(std::pair<real, size_t>(a[i + j * numSamples], j));
}
std::partial_sort(
vec.begin(), vec.begin() + beam, vec.end(),
[](const std::pair<real, size_t>& l, const std::pair<real, size_t>& r) {
return l.first > r.first;
});
for (size_t j = 0; j < beam; j++) {
t[i + j * numSamples] = vec[j].first;
s[i + j * numSamples] = vec[j].second;
}
}
}
void CpuMatrix::maxoutForward(Matrix& a, IVector& id, size_t channels,
size_t groups) {
CHECK(dynamic_cast<CpuMatrix*>(&a));
CHECK(dynamic_cast<CpuIVector*>(&id));
CHECK_EQ(a.getHeight(), getHeight());
size_t size = getWidth();
size_t batchSize = getHeight();
size_t featLen = size / channels;
const real* input = a.getData();
int* idForCpu = id.getData();
MatrixPtr maxInMat, maxOutMat;
Matrix::resizeOrCreate(maxInMat, groups, size, false, false);
Matrix::resizeOrCreate(maxOutMat, 1, size, false, false);
for (size_t batch_idx = 0; batch_idx < batchSize; ++batch_idx) {
size_t newIndex = batch_idx * size;
IVectorPtr tmpId = IVector::create(idForCpu + newIndex, size, false);
for (size_t i = 0; i < channels; ++i) {
size_t newFeatLen = i * featLen;
for (size_t j = 0; j < groups; ++j) {
maxInMat->subMatrix(j, j + 1, newFeatLen, newFeatLen + featLen)
->copyFrom(input + (newIndex + newFeatLen) * groups + j * featLen,
featLen);
}
}
maxInMat->colMax(*tmpId, *maxOutMat);
this->subRowMatrix(batch_idx, batch_idx + 1)->copyFrom(*maxOutMat);
}
}
void CpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels,
size_t groups) {
CHECK(dynamic_cast<CpuMatrix*>(&a));
CHECK(dynamic_cast<CpuIVector*>(&id));
CHECK_EQ(a.getHeight(), getHeight());
size_t size = a.getWidth();
size_t batchSize = getHeight();
size_t featLen = size / channels;
size_t newFeatLen = groups * featLen;
real* inputG = getData();
const real* outG = a.getData();
int* idForCpu = id.getData();
for (size_t batch_idx = 0; batch_idx < batchSize; ++batch_idx) {
size_t newIndex = batch_idx * size;
int* idData = idForCpu + newIndex;
for (size_t i = 0; i < size; ++i) {
int gradIdx =
idData[i] * featLen + (i / featLen) * newFeatLen + i % featLen;
(inputG + newIndex * groups)[gradIdx] += (outG + newIndex)[i];
}
}
}
void CpuMatrix::rowNormalizeL1(Matrix& out) { void CpuMatrix::rowNormalizeL1(Matrix& out) {
CHECK(!out.useGpu()); CHECK(!out.useGpu());
......
...@@ -493,16 +493,40 @@ public: ...@@ -493,16 +493,40 @@ public:
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
/**
* set the max of each column of this to mat
*/
virtual void colMax(Matrix& max) { LOG(FATAL) << "not implemented"; } virtual void colMax(Matrix& max) { LOG(FATAL) << "not implemented"; }
/**
* @brief Get the top k elements of each column of this matrix.
*
* The row ids and values of these elements are stored in
* maxIds and max respectively. where k is the size of maxIds.
* And note that the top k elements are not sorted.
*/
virtual void colMax(IVector& maxIds, Matrix& maxVal) {
LOG(FATAL) << "not implemented";
}
virtual void maxoutForward(Matrix& a, IVector& id, size_t channels,
size_t groups) {
LOG(FATAL) << "not implemented";
}
virtual void maxoutBackward(Matrix& a, IVector& id, size_t channels,
size_t groups) {
LOG(FATAL) << "not implemented";
}
virtual void rowMaxId(IVector& maxIds) { LOG(FATAL) << "Not implemented"; } virtual void rowMaxId(IVector& maxIds) { LOG(FATAL) << "Not implemented"; }
/** /**
* @brief Get the top k elements of each row of this matrix. * @brief Get the top k elements of each row of this matrix.
* *
* The column ids and values of these elements are stored in * The column ids and values of these elements are stored in
* maxIds and max respectively. Note that the top k * maxIds and max respectively. where k is the size of maxIds.
* elements are not sorted. * And note that the top k elements are not sorted.
*/ */
virtual void rowMax(IVector& maxIds, Matrix& max) { virtual void rowMax(IVector& maxIds, Matrix& max) {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
...@@ -1085,6 +1109,9 @@ public: ...@@ -1085,6 +1109,9 @@ public:
void rowMax(Matrix& max); void rowMax(Matrix& max);
void rowMax(IVector& maxIds, Matrix& max); void rowMax(IVector& maxIds, Matrix& max);
void colMax(Matrix& max); void colMax(Matrix& max);
void colMax(IVector& maxIds, Matrix& max);
void maxoutForward(Matrix& a, IVector& id, size_t channels, size_t groups);
void maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t groups);
void oneHotCrossEntropy(Matrix& output, IVector& label); void oneHotCrossEntropy(Matrix& output, IVector& label);
void oneHotCrossEntropyBp(Matrix& outputV, IVector& label); void oneHotCrossEntropyBp(Matrix& outputV, IVector& label);
...@@ -1395,6 +1422,9 @@ public: ...@@ -1395,6 +1422,9 @@ public:
void rowMax(Matrix& max); void rowMax(Matrix& max);
void rowMax(IVector& maxIds, Matrix& maxVal); void rowMax(IVector& maxIds, Matrix& maxVal);
void colMax(Matrix& max); void colMax(Matrix& max);
void colMax(IVector& maxIds, Matrix& maxVal);
void maxoutForward(Matrix& a, IVector& id, size_t channels, size_t groups);
void maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t groups);
void rowNormalizeL1(Matrix& out); void rowNormalizeL1(Matrix& out);
void oneHotCrossEntropy(Matrix& output, IVector& label); void oneHotCrossEntropy(Matrix& output, IVector& label);
......
...@@ -1999,6 +1999,78 @@ TEST(Matrix, PoolFwdBwd) { ...@@ -1999,6 +1999,78 @@ TEST(Matrix, PoolFwdBwd) {
} }
} }
void testMaxOutFwdBwd(int numSamples, int imgSizeH, int imgSizeW,
int channels, int groups) {
int inWidth = imgSizeH * imgSizeW * channels;
int outChannels = channels / groups;
int outWidth = imgSizeH * imgSizeW * outChannels;
// forward
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
IVectorPtr id = CpuIVector::create(numSamples * outWidth, false);
IVectorPtr idGpu = GpuIVector::create(numSamples * outWidth, true);
IVectorPtr idCheck = CpuIVector::create(numSamples * outWidth, false);
input->randomizeUniform();
inputGpu->copyFrom(*input);
target->maxoutForward(*input, *id, outChannels, groups);
targetGpu->maxoutForward(*inputGpu, *idGpu, outChannels, groups);
// check
targetCheck->copyFrom(*targetGpu);
MatrixCheckErr(*target, *targetCheck);
idCheck->copyFrom(*idGpu);
VectorCheckEqual(*id, *idCheck);
// backward
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth, false,
true);
MatrixPtr targetCheckGrad = CpuMatrix::create(numSamples, inWidth, false,
false);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->maxoutBackward(*targetGrad, *id, outChannels, groups);
inputGpuGrad->maxoutBackward(*targetGpuGrad, *idGpu, outChannels, groups);
// check
targetCheckGrad->copyFrom(*inputGpuGrad);
MatrixCheckErr(*inputGrad, *targetCheckGrad);
}
TEST(Matrix, MaxOutFwdBwd) {
for (auto numSamples : {5, 10}) {
for (auto channels : {8, 16}) {
for (auto imgSizeH : {14, 28}) {
for (auto imgSizeW : {16, 30}) {
for (auto groups : {2, 4}) {
VLOG(3) << " numSamples=" << numSamples
<< " channels=" << channels
<< " imgSizeH=" << imgSizeH
<< " imgSizeW=" << imgSizeW
<< " groups=" << groups;
testMaxOutFwdBwd(numSamples, imgSizeH, imgSizeW, channels, groups);
}
}
}
}
}
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);
......
...@@ -170,6 +170,15 @@ message BlockExpandConfig { ...@@ -170,6 +170,15 @@ message BlockExpandConfig {
required uint32 img_size_y = 11; required uint32 img_size_y = 11;
} }
message MaxOutConfig {
required uint32 channels = 1;
required uint32 groups = 2;
// The size of input feature map.
required uint32 img_size_x = 3;
required uint32 img_size_y = 4;
}
message ProjectionConfig { message ProjectionConfig {
required string type = 1; required string type = 1;
required string name = 2; required string name = 2;
...@@ -225,6 +234,7 @@ message LayerInputConfig { ...@@ -225,6 +234,7 @@ message LayerInputConfig {
// If the input layer has multi-output. // If the input layer has multi-output.
// Set the argument name. // Set the argument name.
optional string input_layer_argument = 9; optional string input_layer_argument = 9;
optional MaxOutConfig maxout_conf = 10;
} }
message LayerConfig { message LayerConfig {
......
...@@ -469,6 +469,7 @@ class Input(Cfg): ...@@ -469,6 +469,7 @@ class Input(Cfg):
pool=None, pool=None,
image=None, image=None,
block_expand=None, block_expand=None,
maxout=None,
format=None, format=None,
nnz=None, nnz=None,
is_static=None, is_static=None,
...@@ -785,6 +786,16 @@ class BlockExpand(Cfg): ...@@ -785,6 +786,16 @@ class BlockExpand(Cfg):
output_y = 0): output_y = 0):
self.add_keys(locals()) self.add_keys(locals())
@config_class
class MaxOut(Cfg):
def __init__(
self,
channels,
groups,
img_size_x = 0,
img_size_y = 0):
self.add_keys(locals())
def DataBase(async_load_data=False, def DataBase(async_load_data=False,
constant_slots=None, constant_slots=None,
data_ratio=1, data_ratio=1,
...@@ -1082,6 +1093,12 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf): ...@@ -1082,6 +1093,12 @@ def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
int(math.ceil((2 * block_expand.padding_y + block_expand.img_size_y \ int(math.ceil((2 * block_expand.padding_y + block_expand.img_size_y \
- block_expand.block_y) / float(block_expand.stride_y))) - block_expand.block_y) / float(block_expand.stride_y)))
def parse_maxout(maxout, input_layer_name, maxout_conf):
maxout_conf.channels = maxout.channels
maxout_conf.groups = maxout.groups
maxout_conf.img_size_x = maxout.img_size_x
maxout_conf.img_size_y = maxout.img_size_y
# Define an evaluator # Define an evaluator
@config_func @config_func
def Evaluator( def Evaluator(
...@@ -1705,6 +1722,21 @@ class BlockExpandLayer(LayerBase): ...@@ -1705,6 +1722,21 @@ class BlockExpandLayer(LayerBase):
self.set_layer_size(block_expand_conf.block_x * block_expand_conf.block_y self.set_layer_size(block_expand_conf.block_x * block_expand_conf.block_y
* block_expand_conf.channels) * block_expand_conf.channels)
@config_layer('maxout')
class MaxOutLayer(LayerBase):
def __init__(
self,
name,
inputs,
**xargs):
super(MaxOutLayer, self).__init__(name, 'maxout', 0, inputs=inputs, **xargs)
input_layer = self.get_input_layer(0)
parse_maxout(self.inputs[0].maxout,
input_layer.name,
self.config.inputs[0].maxout_conf)
maxout_conf = self.config.inputs[0].maxout_conf
self.set_layer_size(g_layer_map[input_layer.name].size / maxout_conf.groups)
# key: cost type # key: cost type
# value: cost class # value: cost class
g_cost_map = {} g_cost_map = {}
......
...@@ -55,7 +55,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", ...@@ -55,7 +55,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
'multi_binary_label_cross_entropy', 'multi_binary_label_cross_entropy',
'rank_cost', 'lambda_cost', 'huber_cost', 'rank_cost', 'lambda_cost', 'huber_cost',
# 'block_expand_layer', # TODO(yuyang18): this layer is not correct # 'block_expand_layer', # TODO(yuyang18): this layer is not correct
'out_prod_layer', 'print_layer' 'maxout_layer', 'out_prod_layer', 'print_layer'
] ]
...@@ -110,6 +110,7 @@ class LayerType(object): ...@@ -110,6 +110,7 @@ class LayerType(object):
SLOPE_INTERCEPT_LAYER = "slope_intercept" SLOPE_INTERCEPT_LAYER = "slope_intercept"
LINEAR_COMBINATION_LAYER = "convex_comb" LINEAR_COMBINATION_LAYER = "convex_comb"
BLOCK_EXPAND = "blockexpand" BLOCK_EXPAND = "blockexpand"
MAXOUT = "maxout"
PRINT_LAYER = "print" PRINT_LAYER = "print"
...@@ -3362,6 +3363,73 @@ def block_expand_layer(input, ...@@ -3362,6 +3363,73 @@ def block_expand_layer(input,
return LayerOutput(name, LayerType.BLOCK_EXPAND, parents=[input]) return LayerOutput(name, LayerType.BLOCK_EXPAND, parents=[input])
@wrap_name_default()
@layer_support()
def maxout_layer(input,
groups,
num_channels=None,
size_x=None,
size_y=None,
name=None,
layer_attr=None):
"""
A layer to do max out on conv layer output.
- Input: output of a conv layer.
- Output: feature map size same as input. Channel is (input channel) / groups.
So groups should be larger than 1, and the num of channels should be able
to devided by groups.
Please refer to Paper:
- Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
- Multi-digit Number Recognition from Street View \
Imagery using Deep Convolutional Neural Networks: \
https://arxiv.org/pdf/1312.6082v4.pdf
The simple usage is:
.. code-block:: python
maxout = maxout_layer(input,
num_channels=128,
groups=4)
:param input: The input layer.
:type input: LayerOutput
:param num_channels: The channel number of input layer. If None will be set
automatically from previous output.
:type num_channels: int|None
:param groups: The group number of input layer.
:type groups: int
:param size_x: conv output width. If None will be set
automatically from previous output.
:type size_x: int|None
:param size_y: conv output height. If None will be set
automatically from previous output.
:type size_y: int|None
:param name: The name of this layer, which can not specify.
:type name: None|basestring.
:param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert input.layer_type == LayerType.CONV_LAYER
assert isinstance(input.activation, LinearActivation)
assert groups > 1
if num_channels is None:
assert input.num_filters is not None
num_channels = input.num_filters
assert num_channels % groups == 0
Layer(name=name,
inputs=Input(input.name,
maxout=MaxOut(channels=num_channels,
groups=groups)),
type=LayerType.MAXOUT,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.MAXOUT, parents=[input])
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def ctc_layer(input, label, size=None, name=None, norm_by_times=False, def ctc_layer(input, label, size=None, name=None, norm_by_times=False,
......
...@@ -12,6 +12,7 @@ a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr ...@@ -12,6 +12,7 @@ a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr
8bb44e1e5072d0c261572307e7672bda test_grumemory_layer.protostr 8bb44e1e5072d0c261572307e7672bda test_grumemory_layer.protostr
1f3510672dce7a9ed25317fc58579ac7 test_hsigmoid.protostr 1f3510672dce7a9ed25317fc58579ac7 test_hsigmoid.protostr
d350bd91a0dc13e854b1364c3d9339c6 test_lstmemory_layer.protostr d350bd91a0dc13e854b1364c3d9339c6 test_lstmemory_layer.protostr
6fa59551808ee7012bbd24f757e782d2 test_maxout.protostr
251a948ba41c1071afcd3d9cf9c233f7 test_ntm_layers.protostr 251a948ba41c1071afcd3d9cf9c233f7 test_ntm_layers.protostr
e6ff04e70aea27c7b06d808cc49c9497 test_print_layer.protostr e6ff04e70aea27c7b06d808cc49c9497 test_print_layer.protostr
2a75dd33b640c49a8821c2da6e574577 test_rnn_group.protostr 2a75dd33b640c49a8821c2da6e574577 test_rnn_group.protostr
......
...@@ -8,7 +8,8 @@ configs=(test_fc layer_activations projections test_print_layer ...@@ -8,7 +8,8 @@ configs=(test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers util_layers simple_rnn_layers unused_layers test_cost_layers img_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight) test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
test_maxout)
for conf in ${configs[*]} for conf in ${configs[*]}
......
from paddle.trainer_config_helpers import *
settings(
batch_size=1000,
learning_rate=1e-5
)
data = data_layer(name='data', size=2304)
conv = img_conv_layer(input=data,
filter_size = 3,
num_channels=1,
num_filters=16,
padding=1,
act=LinearActivation(),
bias_attr=True)
maxout = maxout_layer(input=conv,
num_channels=16,
groups=2)
pool = img_pool_layer(input=maxout,
num_channels=8,
pool_size=2,
stride=2,
pool_type=MaxPooling())
fc = fc_layer(input=pool, size=384, bias_attr=False)
outputs(fc)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册