提交 d4087efc 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into cudnn_wrapper

...@@ -224,4 +224,80 @@ extern void hl_matrix_collect_shared_bias(real* B_d, ...@@ -224,4 +224,80 @@ extern void hl_matrix_collect_shared_bias(real* B_d,
extern void hl_matrix_rotate( extern void hl_matrix_rotate(
real* mat, real* matRot, int dimM, int dimN, bool clockWise); real* mat, real* matRot, int dimM, int dimN, bool clockWise);
/**
* @brief Matrix vol2Col: Convert 3D volume into col matrix
*
* @param[in] matSrc input matrix.
* @param[in] channel channel of matSrc.
* @param[in] depth depth of matSrc.
* @param[in] height height of matSrc.
* @param[in] width width of matSrc.
* @param[in] filterD depth of filter.
* @param[in] filterH height of filter.
* @param[in] filterW width of filter.
* @param[in] strideD stride in the depth.
* @param[in] strideH stride in the height.
* @param[in] strideW stride in the width.
* @param[in] paddingD padding in the depth.
* @param[in] paddingH padding in the height.
* @param[in] paddingW padding in the width.
* @param[out] dataDst output matrix.
*
*/
extern void hl_matrix_vol2Col(const real* dataSrc,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real* dataDst);
/**
* @brief Matrix col2Vol: Convert col matrix into 3D volume
*
* @param[out] matDst output matrix.
* @param[in] channel channel of matDst.
* @param[in] depth depth of matDst.
* @param[in] height height of matDst.
* @param[in] width width of matDst.
* @param[in] filterD depth of filter.
* @param[in] filterH height of filter.
* @param[in] filterW width of filter.
* @param[in] strideD stride in the depth.
* @param[in] strideH stride in the height.
* @param[in] strideW stride in the width.
* @param[in] paddingD padding in the depth.
* @param[in] paddingH padding in the height.
* @param[in] paddingW padding in the width.
* @param[in] matSrc input matrix.
* @param[in] beta input
* @param[in] alpha input
*
*/
extern void hl_matrix_col2Vol(real* dataDst,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
const real* dataSrc,
real alpha,
real beta);
#endif /* HL_MATRIX_H_ */ #endif /* HL_MATRIX_H_ */
...@@ -99,4 +99,38 @@ inline void hl_matrix_collect_shared_bias(real* B_d, ...@@ -99,4 +99,38 @@ inline void hl_matrix_collect_shared_bias(real* B_d,
inline void hl_matrix_rotate( inline void hl_matrix_rotate(
real* mat, real* matRot, int dimM, int dimN, bool clockWise) {} real* mat, real* matRot, int dimM, int dimN, bool clockWise) {}
inline void hl_matrix_vol2Col(const real* dataSrc,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real* dataDst) {}
inline void hl_matrix_col2Vol(real* dataDst,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
const real* dataSrc,
real alpha,
real beta) {}
#endif // HL_MATRIX_STUB_H_ #endif // HL_MATRIX_STUB_H_
...@@ -592,3 +592,204 @@ void hl_matrix_rotate( ...@@ -592,3 +592,204 @@ void hl_matrix_rotate(
mat, matRot, dimM, dimN, clockWise); mat, matRot, dimM, dimN, clockWise);
CHECK_SYNC("hl_matrix_rotate failed"); CHECK_SYNC("hl_matrix_rotate failed");
} }
__global__ void keMatrixVol2Col(int num_kernels,
const real* dataSrc,
real* dataDst,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
int depth_col,
int height_col,
int width_col) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels;
index += blockDim.x * gridDim.x) {
int w_out = index % width_col;
int h_out = (index / width_col) % height_col;
int d_out = (index / width_col / height_col) % depth_col;
int channel_in = index / width_col / height_col / depth_col;
int channel_out = channel_in * filterD * filterH * filterW;
int w_in = w_out * strideW - paddingW;
int h_in = h_out * strideH - paddingH;
int d_in = d_out * strideD - paddingD;
dataDst +=
((channel_out * depth_col + d_out) * height_col + h_out) * width_col +
w_out;
dataSrc += ((channel_in * depth + d_in) * height + h_in) * width + w_in;
for (int k = 0; k < filterD; ++k) {
for (int i = 0; i < filterH; ++i) {
for (int j = 0; j < filterW; ++j) {
int d = d_in + k;
int h = h_in + i;
int w = w_in + j;
*dataDst = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 &&
w < width)
? dataSrc[(k * height + i) * width + j]
: 0;
dataDst += depth_col * height_col * width_col;
}
}
}
}
}
void hl_matrix_vol2Col(const real* dataSrc,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real* dataDst) {
int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1;
int height_col = (height + 2 * paddingH - filterH) / strideH + 1;
int width_col = (width + 2 * paddingW - filterW) / strideW + 1;
int num_kernels = channels * depth_col * height_col * width_col;
const int threads = 512;
const int blocks = DIVUP(num_kernels, threads);
keMatrixVol2Col<<<blocks, threads, 0, STREAM_DEFAULT>>>(num_kernels,
dataSrc,
dataDst,
depth,
height,
width,
filterD,
filterH,
filterW,
strideD,
strideH,
strideW,
paddingD,
paddingH,
paddingW,
depth_col,
height_col,
width_col);
CHECK_SYNC("hl_matrix_vol2Col failed");
}
__global__ void keMatrixCol2Vol(int num_kernels,
real* dataDst,
const real* dataSrc,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
int depth_col,
int height_col,
int width_col,
real alpha,
real beta) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels;
index += blockDim.x * gridDim.x) {
real srcVal = 0;
real dstVal = dataDst[index];
int w = index % width + paddingW;
int h = (index / width) % height + paddingH;
int d = (index / width / height) % depth + paddingD;
int c = index / width / height / depth;
// compute the start and end of the output
int w_col_start = (w < filterW) ? 0 : (w - filterW) / strideW + 1;
int w_col_end = min(w / strideW + 1, width_col);
int h_col_start = (h < filterH) ? 0 : (h - filterH) / strideH + 1;
int h_col_end = min(h / strideH + 1, height_col);
int d_col_start = (d < filterD) ? 0 : (d - filterD) / strideD + 1;
int d_col_end = min(d / strideD + 1, depth_col);
int offset = (c * filterD * filterW * filterH + d * filterW * filterH +
h * filterW + w) *
depth_col * height_col * width_col;
int coeff_d_col =
(1 - strideD * filterW * filterH * depth_col) * height_col * width_col;
int coeff_h_col =
(1 - strideH * filterW * depth_col * height_col) * width_col;
int coeff_w_col = (1 - strideW * depth_col * height_col * width_col);
for (int d_col = d_col_start; d_col < d_col_end; ++d_col) {
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
srcVal += dataSrc[offset + d_col * coeff_d_col + h_col * coeff_h_col +
w_col * coeff_w_col];
}
}
}
dataDst[index] = alpha * srcVal + beta * dstVal;
}
}
void hl_matrix_col2Vol(real* dataDst,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
const real* dataSrc,
real alpha,
real beta) {
int depth_col = (depth + 2 * paddingD - filterD) / strideD + 1;
int height_col = (height + 2 * paddingH - filterH) / strideH + 1;
int width_col = (width + 2 * paddingW - filterW) / strideW + 1;
int num_kernels = channels * depth * height * width;
const int threads = 512;
const int blocks = DIVUP(num_kernels, threads);
keMatrixCol2Vol<<<blocks, threads, 0, STREAM_DEFAULT>>>(num_kernels,
dataDst,
dataSrc,
depth,
height,
width,
filterD,
filterH,
filterW,
strideD,
strideH,
strideW,
paddingD,
paddingH,
paddingW,
depth_col,
height_col,
width_col,
alpha,
beta);
CHECK_SYNC("hl_matrix_col2Vol failed");
}
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Conv3DLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
REGISTER_LAYER(conv3d, Conv3DLayer);
bool Conv3DLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) {
if (!ConvBaseLayer::init(layerMap, parameterMap)) return false;
int index = 0;
for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf();
M_.push_back(numFilters_ / conf.groups());
K_.push_back(filterPixels_[index] * filterChannels_[index]);
// create a new weight
size_t height, width;
width = filterPixels_[index] * filterChannels_[index];
height = numFilters_;
CHECK_EQ(parameters_[index]->getSize(), width * height);
Weight *w = new Weight(height, width, parameters_[index]);
weights_.emplace_back(w);
++index;
}
if (biasParameter_.get()) {
if (sharedBiases_) {
CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
biases_ =
std::unique_ptr<Weight>(new Weight(1, numFilters_, biasParameter_));
} else {
biases_ =
std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
}
}
return true;
}
size_t Conv3DLayer::getSize() {
CHECK_NE(inputLayers_.size(), 0UL);
outputH_.clear();
outputW_.clear();
outputD_.clear();
N_.clear();
size_t layerSize = 0;
for (size_t i = 0; i < inputLayers_.size(); ++i) {
outputW_.push_back(outputSize(
imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true));
outputH_.push_back(outputSize(
imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true));
outputD_.push_back(outputSize(
imgSizeD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true));
N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize);
layerSize += N_[i] * numFilters_;
}
getOutput().setFrameHeight(outputH_[0]);
getOutput().setFrameWidth(outputW_[0]);
getOutput().setFrameDepth(outputD_[0]);
return layerSize;
}
void Conv3DLayer::forward(PassType passType) {
Layer::forward(passType);
int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
int outWidth = getSize();
resetOutput(batchSize, outWidth);
for (size_t i = 0; i != inputLayers_.size(); ++i) {
REGISTER_TIMER_INFO("FwdConv3D", getName().c_str());
const MatrixPtr &inMat = getInputValue(i);
const MatrixPtr &outMat = getOutputValue();
int M = M_[i];
int N = N_[i];
int K = K_[i];
Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
MatrixPtr wMat = weights_[i]->getW();
for (int n = 0; n < batchSize; ++n) {
colBuf_->vol2Col(inMat->getData() + n * inMat->getStride(),
channels_[i],
imgSizeD_[i],
imgSizeH_[i],
imgSizeW_[i],
filterSizeZ_[i],
filterSizeY_[i],
filterSize_[i],
strideZ_[i],
strideY_[i],
stride_[i],
paddingZ_[i],
paddingY_[i],
padding_[i]);
real *outData = outMat->getData() + n * outMat->getStride();
MatrixPtr outMatSub =
Matrix::create(outData, groups_[i] * M, N, false, useGpu_);
for (int g = 0; g < groups_[i]; g++) {
MatrixPtr wMatSub = wMat->subMatrix(g * M, M);
MatrixPtr in = colBuf_->subMatrix(g * K, K);
MatrixPtr out = outMatSub->subMatrix(g * M, M);
out->mul(*wMatSub, *in, 1.0, 1.0);
}
}
}
if (nullptr != this->biasParameter_) {
REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str());
this->addBias();
}
forwardActivation();
}
void Conv3DLayer::backward(const UpdateCallback &callback) {
backwardActivation();
if (biases_ && biases_->getWGrad()) {
bpropBiases();
biases_->getParameterPtr()->incUpdate(callback);
}
for (size_t i = 0; i != inputLayers_.size(); ++i) {
REGISTER_TIMER_INFO("BwdConv3D", getName().c_str());
if (weights_[i]->getWGrad()) {
bpropWeights(i);
}
if (getInputGrad(i)) {
bpropData(i);
}
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
weights_[i]->getParameterPtr()->incUpdate(callback);
}
}
void Conv3DLayer::bpropWeights(int i) {
int M = M_[i];
int N = N_[i];
int K = K_[i];
const MatrixPtr &inMat = getInputValue(i);
Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
MatrixPtr wGradMat = weights_[i]->getWGrad();
int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
for (int n = 0; n < batchSize; ++n) {
colBuf_->vol2Col(inMat->getData() + n * inMat->getStride(),
channels_[i],
imgSizeD_[i],
imgSizeH_[i],
imgSizeW_[i],
filterSizeZ_[i],
filterSizeY_[i],
filterSize_[i],
strideZ_[i],
strideY_[i],
stride_[i],
paddingZ_[i],
paddingY_[i],
padding_[i]);
real *outGradData =
getOutputGrad()->getData() + n * getOutputGrad()->getStride();
MatrixPtr outGradSub =
Matrix::create(outGradData, groups_[i] * M, N, false, useGpu_);
for (int g = 0; g < groups_[i]; ++g) {
MatrixPtr inMatSub = colBuf_->subMatrix(g * K, K);
MatrixPtr outG = outGradSub->subMatrix(g * M, M);
MatrixPtr wGradSub = wGradMat->subMatrix(g * M, M);
wGradSub->mul(*outG, *(inMatSub->getTranspose()), 1.0, 1.0);
}
}
}
void Conv3DLayer::bpropData(int i) {
int M = M_[i];
int N = N_[i];
int K = K_[i];
Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
MatrixPtr wMat = weights_[i]->getW();
int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
for (int n = 0; n < batchSize; ++n) {
real *outGradData =
getOutputGrad()->getData() + n * getOutputGrad()->getStride();
real *preGradData =
getInputGrad(i)->getData() + n * getInputGrad(i)->getStride();
MatrixPtr outGradSub =
Matrix::create(outGradData, M * groups_[i], N, false, useGpu_);
for (int g = 0; g < groups_[i]; ++g) {
MatrixPtr wMatSub = wMat->subMatrix(g * M, M);
MatrixPtr outG = outGradSub->subMatrix(g * M, M);
MatrixPtr inGradMatSub = colBuf_->subMatrix(g * K, K);
inGradMatSub->mul(*(wMatSub->getTranspose()), *outG, 1.0, 0.0);
}
colBuf_->col2Vol(preGradData,
channels_[i],
imgSizeD_[i],
imgSizeH_[i],
imgSizeW_[i],
filterSizeZ_[i],
filterSizeY_[i],
filterSize_[i],
strideZ_[i],
strideY_[i],
stride_[i],
paddingZ_[i],
paddingY_[i],
padding_[i],
1.0,
1.0);
}
}
void Conv3DLayer::bpropBiases() {
MatrixPtr outGradMat = getOutputGrad();
if (this->sharedBiases_) {
biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f);
} else {
biases_->getWGrad()->collectBias(*outGradMat, 1.0f);
}
}
void Conv3DLayer::addBias() {
MatrixPtr outMat = getOutputValue();
if (this->sharedBiases_) {
outMat->addSharedBias(*(biases_->getW()), 1.0f);
} else {
outMat->addBias(*(biases_->getW()), 1.0f);
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "ConvBaseLayer.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief A subclass of convolution layer.
* This layer expands input and use matrix multiplication to
* calculate convolution operation.
*/
class Conv3DLayer : public ConvBaseLayer {
public:
explicit Conv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
~Conv3DLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void addBias();
void backward(const UpdateCallback& callback);
void bpropBiases();
void bpropData(int i);
void bpropWeights(int i);
size_t getSize();
protected:
// Figure out the dimensions for individual gemms.
IntV M_; /// numFilters_ / filter_group_;
IntV N_; /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_
IntV K_; /// outputD_ * outputH_ * outputW_
MatrixPtr colBuf_;
};
} // namespace paddle
...@@ -38,7 +38,6 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, ...@@ -38,7 +38,6 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
strideY_.push_back(conf.stride_y()); strideY_.push_back(conf.stride_y());
dilationY_.push_back(conf.dilation_y()); dilationY_.push_back(conf.dilation_y());
filterSizeY_.push_back(conf.filter_size_y()); filterSizeY_.push_back(conf.filter_size_y());
filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
channels_.push_back(conf.channels()); channels_.push_back(conf.channels());
imgSizeH_.push_back(conf.has_img_size_y() ? conf.img_size_y() imgSizeH_.push_back(conf.has_img_size_y() ? conf.img_size_y()
: conf.img_size()); : conf.img_size());
...@@ -47,31 +46,20 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, ...@@ -47,31 +46,20 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
filterChannels_.push_back(conf.filter_channels()); filterChannels_.push_back(conf.filter_channels());
outputH_.push_back(conf.has_output_y() ? conf.output_y() : conf.output_x()); outputH_.push_back(conf.has_output_y() ? conf.output_y() : conf.output_x());
outputW_.push_back(conf.output_x()); outputW_.push_back(conf.output_x());
paddingZ_.push_back(conf.padding_z());
strideZ_.push_back(conf.stride_z());
filterSizeZ_.push_back(conf.filter_size_z());
imgSizeD_.push_back(conf.img_size_z());
outputD_.push_back(conf.output_z());
filterPixels_.push_back(filterSize_.back() * filterSizeY_.back() *
filterSizeZ_.back());
} }
CHECK(inputLayers_.size() == parameters_.size()); CHECK(inputLayers_.size() == parameters_.size());
for (size_t i = 0; i < inputLayers_.size(); i++) {
size_t height, width;
height = filterPixels_[i] * filterChannels_[i];
width = (!isDeconv_) ? numFilters_ : channels_[i];
// create a new weight
CHECK_EQ(parameters_[i]->getSize(), width * height);
Weight* w = new Weight(height, width, parameters_[i]);
weights_.emplace_back(w);
}
/* initialize the biases_ */ // create new weights_ in derived class
if (biasParameter_.get()) { // create new biases_ in derived class
if (sharedBiases_) {
CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
biases_ =
std::unique_ptr<Weight>(new Weight(numFilters_, 1, biasParameter_));
} else {
biases_ =
std::unique_ptr<Weight>(new Weight(getSize(), 1, biasParameter_));
}
}
// default caffe model // default caffe model
caffeMode_ = true; caffeMode_ = true;
......
...@@ -62,6 +62,13 @@ protected: ...@@ -62,6 +62,13 @@ protected:
IntV outputH_; IntV outputH_;
/// The spatial dimensions of output feature map width. /// The spatial dimensions of output feature map width.
IntV outputW_; IntV outputW_;
IntV outputD_;
IntV imgSizeD_;
IntV filterSizeZ_;
IntV strideZ_;
IntV paddingZ_;
/// Group size, refer to grouped convolution in /// Group size, refer to grouped convolution in
/// Alex Krizhevsky's paper: when group=2, the first half of the /// Alex Krizhevsky's paper: when group=2, the first half of the
/// filters are only connected to the first half of the input channels, /// filters are only connected to the first half of the input channels,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "CrossEntropyOverBeam.h"
namespace paddle {
void CostForOneSequence::calValidExpandStep() {
validExpansionCount_ = 0;
goldAsExtraPath_ = true;
for (size_t i = 0; i < beams_->expansionCount; ++i) {
real gold = static_cast<real>(beams_->gold[i]);
if (i) {
real* start = beams_->candidateIds[i - 1]->getData();
goldRowIds_[i] = std::count_if(
start,
start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1],
[](const real& val) { return val != -1.; });
} else {
goldRowIds_[i] = 0;
}
real* start =
beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_;
real* findEnd = std::find(start, start + beamSize_, gold);
validExpansionCount_++;
if (start + beamSize_ == findEnd) return;
goldColIds_[i] = findEnd - start;
}
if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false;
}
size_t CostForOneSequence::initLastExpansion() {
int beamId = validExpansionCount_ - 1;
const MatrixPtr candidates = beams_->candidateIds[beamId];
size_t height = candidates->getHeight();
/* initialization the last expansion. */
size_t pathCount = std::count_if(candidates->getData(),
candidates->getData() + height * beamSize_,
[](const real& val) { return val != -1; });
/*
* if the gold sequence falls off the beam during search, add the gold
* sequence as the last path into the all expanded candidates.
*/
if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++;
pathRowIdsInEachBeam_.clear();
pathRowIdsInEachBeam_.resize(validExpansionCount_,
std::vector<int>(pathCount, 0));
parentIdsInBeam_.clear();
parentIdsInBeam_.resize(pathCount, 0);
if (goldAsExtraPath_) {
/* add gold sequence into the total expansion. */
pathRowIdsInEachBeam_[beamId].back() =
beams_->gold[beamId] +
getSeqStartPos(beamId, goldRowIds_[validExpansionCount_ - 1]);
parentIdsInBeam_.back() = goldRowIds_[validExpansionCount_ - 1];
} else {
size_t goldOffset = goldRowIds_[beamId] * beamSize_ + goldColIds_[beamId];
goldIdsInFinalExpansion_ =
std::count_if(candidates->getData(),
candidates->getData() + goldOffset,
[](const real& val) { return val != -1.; });
}
/*
* TODO(caoying): fix this, store the indices of selected candidate
* paths into Argument.ids
*/
real* ids = candidates->getData();
size_t curIdx = 0;
for (size_t i = 0; i < height; ++i) {
int basePos = getSeqStartPos(beamId, i);
for (size_t j = 0; j < beamSize_; ++j) {
int id = ids[i * beamSize_ + j];
if (id == -1) continue;
pathRowIdsInEachBeam_[beamId][curIdx] = id + basePos;
parentIdsInBeam_[curIdx++] = i;
}
}
return pathCount;
}
void CostForOneSequence::constructTotalExpansion() {
/*
* construct the entire expanded beam by begining with the last search
* in which gold falls off the beam.
*/
size_t totalPathCount = initLastExpansion();
for (int beamId = validExpansionCount_ - 2; beamId >= 0; --beamId) {
const MatrixPtr candidates = beams_->candidateIds[beamId];
real* ids = candidates->getData();
int lastParentIdInBeam = -1;
int basePos = -1;
for (size_t i = 0;
i < (goldAsExtraPath_ ? totalPathCount - 1 : totalPathCount);
++i) {
int id = ids[parentIdsInBeam_[i]];
int parentRowId = std::div(parentIdsInBeam_[i], beamSize_).quot;
if (parentIdsInBeam_[i] != lastParentIdInBeam)
basePos = getSeqStartPos(beamId, parentRowId);
pathRowIdsInEachBeam_[beamId][i] = id + basePos;
lastParentIdInBeam = parentIdsInBeam_[i];
parentIdsInBeam_[i] = parentRowId;
if (goldAsExtraPath_)
pathRowIdsInEachBeam_[beamId][totalPathCount - 1] =
beams_->gold[beamId] + getSeqStartPos(beamId, goldRowIds_[beamId]);
}
}
}
real CostForOneSequence::globallyNormalizedScore() {
expandedPathScores_.resize(validExpansionCount_);
Matrix::resizeOrCreate(
softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false);
softmaxOut_->zeroMem();
MatrixPtr tmp = Matrix::create(
softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
for (size_t i = 0; i < validExpansionCount_; ++i) {
Matrix::resizeOrCreate(expandedPathScores_[i],
pathRowIdsInEachBeam_[i].size(),
1,
false,
false);
expandedPathScores_[i]->zeroMem();
IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
pathRowIdsInEachBeam_[i].size(),
false);
expandedPathScores_[i]->selectRows(*(beams_->scores[i]), *rowIds);
tmp->add(*expandedPathScores_[i]);
}
softmaxOut_->softmax(*softmaxOut_);
return -std::log(softmaxOut_->getData()[goldIdsInFinalExpansion_]);
}
real CostForOneSequence::forward() {
calValidExpandStep();
constructTotalExpansion();
return globallyNormalizedScore();
}
void CostForOneSequence::backward() {
/*
* when softmax layer is the output layer, and it is combined with
* cross-entropy as cost. The derivate with regard to softmax's input
* is simply:
*
* grad_i = softmax_out_i - target_i,
*
* and here hard label is used.
*/
softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.;
MatrixPtr tmp = Matrix::create(
softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
for (size_t i = 0; i < validExpansionCount_; ++i) {
IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
pathRowIdsInEachBeam_[i].size(),
false);
/*
beams_->scoreGrad[i] has been intialized outside this class, this
class only keeps a pointer pointing to the original input gradients,
so here does not need to allocate or initalize the memory.
*/
tmp->addToRows(*beams_->scoreGrad[i], *rowIds);
}
}
REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam);
bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number.";
beamExpanCount_ = inputLayers_.size() / 3;
candidateScores_.resize(beamExpanCount_);
candidateScoreGrad_.resize(beamExpanCount_);
candidateInBeam_.resize(beamExpanCount_);
goldSequence_.resize(beamExpanCount_);
gradToInputs_.resize(beamExpanCount_);
setNeedSequenceInfo(false);
return true;
}
void CrossEntropyOverBeam::checkInputs() {
batchSize_ = 0;
for (size_t i = 0; i < beamExpanCount_; ++i) {
const Argument& scores = getInput(i * 3);
const Argument& selCandidates = getInput(i * 3 + 1);
const Argument& goldSeq = getInput(i * 3 + 2);
if (i) {
CHECK(scores.hasSubseq()) << "input " << i << " "
<< inputLayers_[i * 3]->getName()
<< " should be a nested sequence";
CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_);
CHECK_EQ(scores.getNumSequences(), batchSize_);
CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize());
} else {
CHECK(scores.hasSeq()) << "input " << i << " "
<< inputLayers_[i]->getName()
<< " should be a sequence";
batchSize_ = scores.getNumSequences();
beamSize_ = getInputValue(i * 3 + 1)->getWidth();
CHECK_EQ(batchSize_, selCandidates.getBatchSize());
}
CHECK_EQ(1U, scores.value->getWidth());
CHECK_EQ(batchSize_, goldSeq.getBatchSize());
}
}
void CrossEntropyOverBeam::copyInputsToCpu() {
auto copyValue = [](const MatrixPtr& src, MatrixPtr& trg) {
if (dynamic_cast<GpuMatrix*>(src.get())) {
Matrix::resizeOrCreate(
trg, src->getHeight(), src->getWidth(), false, false);
trg->copyFrom(*src);
} else {
trg = std::move(src);
}
};
auto copyIds = [](const IVectorPtr& src, IVectorPtr& trg) {
if (dynamic_cast<GpuIVector*>(src.get())) {
IVector::resizeOrCreate(trg, src->getSize(), false);
trg->copyFrom(*src);
} else {
trg = std::move(src);
}
};
beamSplitPos_.clear();
beamSplitPos_.resize(batchSize_, std::vector<int>(beamExpanCount_, 0));
for (size_t i = 0; i < beamExpanCount_; ++i) {
copyValue(getInputValue(i * 3), candidateScores_[i]);
copyValue(getInputValue(i * 3 + 1), candidateInBeam_[i]);
copyIds(getInput(i * 3 + 2).ids, goldSequence_[i]);
if (i) {
ICpuGpuVectorPtr seqInfo = getInput(i * 3).sequenceStartPositions;
const int* seqStarts = seqInfo->getMutableData(false);
ICpuGpuVectorPtr subSeqInfo = getInput(i * 3).subSequenceStartPositions;
const int* subSeqStarts = subSeqInfo->getMutableData(false);
size_t seqId = 1;
for (size_t subSeqId = 0; subSeqId < subSeqInfo->getSize() - 1;
++subSeqId) {
CHECK_LT(seqId, seqInfo->getSize());
if (subSeqStarts[subSeqId] == seqStarts[seqId]) {
beamSplitPos_[seqId][i] = beamSplitPos_[seqId - 1][i];
seqId++;
}
beamSplitPos_[seqId - 1][i]++;
}
} else {
for (size_t j = 0; j < batchSize_; ++j) beamSplitPos_[j][i] = j + 1;
}
}
}
void CrossEntropyOverBeam::splitBatchBeams() {
beamCosts_.resize(batchSize_);
beamPerSeq_.resize(batchSize_, BeamExpansion(beamExpanCount_));
for (size_t i = 0; i < beamExpanCount_; ++i) {
int* seqStarts =
getInput(i * 3).sequenceStartPositions->getMutableData(false);
int* subSeqStarts = nullptr;
int maxLen = 0;
if (i) {
subSeqStarts =
getInput(i * 3).subSequenceStartPositions->getMutableData(false);
maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1;
} else {
maxLen = getInput(i).sequenceStartPositions->getSize() - 1;
}
for (size_t j = 0; j < batchSize_; ++j) {
beamPerSeq_[j].scores[i] =
Matrix::create(candidateScores_[i]->getData() + seqStarts[j],
seqStarts[j + 1] - seqStarts[j],
1,
false,
false);
beamPerSeq_[j].scoreGrad[i] =
Matrix::create(candidateScoreGrad_[i]->getData() + seqStarts[j],
seqStarts[j + 1] - seqStarts[j],
1,
false,
false);
int offset = j ? beamSplitPos_[j - 1][i] : 0;
int height = beamSplitPos_[j][i] - (j ? beamSplitPos_[j - 1][i] : 0);
CHECK_GE(maxLen, offset + height);
beamPerSeq_[j].seqInfo[i] = IVector::create(
(i ? subSeqStarts : seqStarts) + offset, height + 1, false);
beamPerSeq_[j].candidateIds[i] =
Matrix::create(candidateInBeam_[i]->getData() + offset * beamSize_,
height,
beamSize_,
false,
false);
beamPerSeq_[j].gold[i] = goldSequence_[i]->getData()[j];
CHECK_LE(beamPerSeq_[j].gold[i], seqStarts[j + 1] - seqStarts[j]);
}
}
}
void CrossEntropyOverBeam::resizeOutput() {
Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false);
output_.value->zeroMem();
for (size_t i = 0; i < beamExpanCount_; ++i) {
MatrixPtr inGrad = getInputGrad(i * 3);
if (dynamic_cast<GpuMatrix*>(inGrad.get())) {
Matrix::resizeOrCreate(candidateScoreGrad_[i],
inGrad->getHeight(),
inGrad->getWidth(),
false,
false);
} else {
candidateScoreGrad_[i] = std::move(inGrad);
}
candidateScoreGrad_[i]->zeroMem();
}
}
void CrossEntropyOverBeam::copyGradToGpu(size_t copyCount) {
for (size_t i = 0; i < beamExpanCount_; ++i) {
if (dynamic_cast<GpuMatrix*>(getInputGrad(i * 3).get()))
getInputGrad(i * 3)->copyFrom(*candidateScoreGrad_[i]);
if (i == copyCount - 1) break;
}
}
void CrossEntropyOverBeam::forward(PassType passType) {
Layer::forward(passType);
checkInputs();
copyInputsToCpu();
resizeOutput();
splitBatchBeams();
MatrixPtr outputValue = getOutputValue();
for (size_t i = 0; i < batchSize_; ++i) {
beamCosts_[i].setData(
std::move(std::make_shared<BeamExpansion>(beamPerSeq_[i])), beamSize_);
outputValue->getData()[i] = beamCosts_[i].forward();
}
}
void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {
for (size_t i = 0; i < batchSize_; ++i) {
beamCosts_[i].backward();
copyGradToGpu(beamCosts_[i].getValidExpansionCount());
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "CrossEntropyOverBeam.h"
#include "Layer.h"
namespace paddle {
/* This struct stores the beams in all search steps for a single sequence. */
struct BeamExpansion {
std::vector<MatrixPtr> scores;
std::vector<IVectorPtr> seqInfo;
std::vector<MatrixPtr> candidateIds;
std::vector<int> gold;
std::vector<MatrixPtr> scoreGrad;
size_t expansionCount;
explicit BeamExpansion(int n) {
expansionCount = n;
scores.resize(expansionCount);
seqInfo.resize(expansionCount);
candidateIds.resize(expansionCount);
scoreGrad.resize(expansionCount);
gold.resize(expansionCount);
}
};
typedef std::shared_ptr<BeamExpansion> BeamExpansionPtr;
class CostForOneSequence {
public:
CostForOneSequence()
: beamSize_(0), validExpansionCount_(0), goldAsExtraPath_(false) {}
void setData(const BeamExpansionPtr bPtr, size_t beamSize) {
beams_ = bPtr;
beamSize_ = beamSize;
expandedPathScores_.clear();
expandedPathScores_.resize(beams_->expansionCount);
goldRowIds_.clear();
goldRowIds_.resize(beams_->expansionCount, 0);
goldColIds_.clear();
goldColIds_.resize(beams_->expansionCount, -1);
}
size_t getValidExpansionCount() { return validExpansionCount_; }
real forward();
void backward();
private:
void calValidExpandStep();
void constructTotalExpansion();
size_t initLastExpansion();
real globallyNormalizedScore();
int getSeqStartPos(size_t beamId, size_t rowId) {
CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId);
int* starts = beams_->seqInfo[beamId]->getData();
return starts[rowId] - starts[0];
}
size_t beamSize_;
size_t validExpansionCount_;
bool goldAsExtraPath_;
std::vector<int> goldRowIds_;
std::vector<int> goldColIds_;
BeamExpansionPtr beams_;
std::vector<std::vector<int>> pathRowIdsInEachBeam_;
std::vector<int> parentIdsInBeam_;
size_t goldIdsInFinalExpansion_;
std::vector<MatrixPtr> expandedPathScores_;
MatrixPtr softmaxOut_;
};
class CrossEntropyOverBeam : public Layer {
public:
explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
private:
void checkInputs();
void copyInputsToCpu();
void resizeOutput();
void copyGradToGpu(size_t copyCount);
void splitBatchBeams();
size_t beamExpanCount_;
size_t batchSize_;
size_t beamSize_;
/*
* the process of constructing beams is not friendly to GPU, currently, this
* layer only runs on CPU, if any of its inputs is on GPU memory, then copy
* it to CPU memory.
*/
std::vector<MatrixPtr> candidateScores_;
std::vector<MatrixPtr> candidateScoreGrad_;
std::vector<MatrixPtr> candidateInBeam_;
std::vector<MatrixPtr> gradToInputs_;
std::vector<IVectorPtr> goldSequence_;
std::vector<std::vector<int>> beamSplitPos_;
/*
* split entire bath of beams into beam per sequnence and store the result
* into this member.
*/
std::vector<BeamExpansion> beamPerSeq_;
/* beamCosts_ is used to propagate error in one sequence. */
std::vector<CostForOneSequence> beamCosts_;
};
} // namespace paddle
...@@ -46,8 +46,26 @@ bool CudnnConvBaseLayer::init(const LayerMap &layerMap, ...@@ -46,8 +46,26 @@ bool CudnnConvBaseLayer::init(const LayerMap &layerMap,
projConf_.emplace_back(conf); projConf_.emplace_back(conf);
projections_.emplace_back( projections_.emplace_back(
Projection::create(*projConf_[i], parameters_[i], useGpu_)); Projection::create(*projConf_[i], parameters_[i], useGpu_));
// create a new weight
size_t height, width;
height = filterPixels_[i] * filterChannels_[i];
width = (!isDeconv_) ? numFilters_ : channels_[i];
CHECK_EQ(parameters_[i]->getSize(), width * height);
Weight *w = new Weight(height, width, parameters_[i]);
weights_.emplace_back(w);
} }
if (biasParameter_.get()) {
if (sharedBiases_) {
CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
biases_ =
std::unique_ptr<Weight>(new Weight(numFilters_, 1, biasParameter_));
} else {
biases_ =
std::unique_ptr<Weight>(new Weight(getSize(), 1, biasParameter_));
}
}
if (biases_.get() && sharedBiases_) { if (biases_.get() && sharedBiases_) {
hl_create_tensor_descriptor(&biasDesc_); hl_create_tensor_descriptor(&biasDesc_);
hl_create_tensor_descriptor(&outputDesc_); hl_create_tensor_descriptor(&outputDesc_);
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "DeConv3DLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
REGISTER_LAYER(deconv3d, DeConv3DLayer);
bool DeConv3DLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) {
if (!ConvBaseLayer::init(layerMap, parameterMap)) return false;
// for Deconv, the dimension of Kernel is
// channel * output * depth * height * weigth
// Matrix storage format: (output * depth * height * weigth) x channel
for (int index = 0; index < config_.inputs().size(); ++index) {
M_.push_back(filterChannels_[index]);
K_.push_back(filterPixels_[index] * (numFilters_ / groups_[index]));
// create a new weight
size_t height, width;
height = filterPixels_[index] * numFilters_;
width = filterChannels_[index];
CHECK_EQ(parameters_[index]->getSize(), width * height);
Weight *w = new Weight(height, width, parameters_[index]);
weights_.emplace_back(w);
}
if (biasParameter_.get()) {
if (sharedBiases_) {
CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
biases_ =
std::unique_ptr<Weight>(new Weight(1, numFilters_, biasParameter_));
} else {
biases_ =
std::unique_ptr<Weight>(new Weight(1, getSize(), biasParameter_));
}
}
return true;
}
size_t DeConv3DLayer::getSize() {
CHECK_NE(inputLayers_.size(), 0UL);
outputH_.clear();
outputW_.clear();
outputD_.clear();
N_.clear();
NOut_.clear();
size_t layerSize = 0;
for (size_t i = 0; i < inputLayers_.size(); ++i) {
outputW_.push_back(
imageSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true));
outputH_.push_back(imageSize(
imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true));
outputD_.push_back(imageSize(
imgSizeD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true));
NOut_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
N_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]);
CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize);
layerSize += NOut_[i] * numFilters_;
}
getOutput().setFrameHeight(outputH_[0]);
getOutput().setFrameWidth(outputW_[0]);
getOutput().setFrameDepth(outputD_[0]);
return layerSize;
}
void DeConv3DLayer::forward(PassType passType) {
Layer::forward(passType);
int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
int outWidth = getSize();
resetOutput(batchSize, outWidth);
const MatrixPtr outMat = getOutputValue();
for (size_t i = 0; i != inputLayers_.size(); ++i) {
REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str());
const MatrixPtr &inMat = getInputValue(i);
int M = M_[i];
int N = N_[i];
int K = K_[i];
MatrixPtr wMat = weights_[i]->getW();
Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
for (int n = 0; n < batchSize; ++n) {
real *inData = inMat->getData() + n * inMat->getStride();
for (int g = 0; g < groups_[i]; ++g) {
MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_);
MatrixPtr wMatSub = wMat->subMatrix(g * K, K);
MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0);
inData += M * N;
}
colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(),
numFilters_,
outputD_[i],
outputH_[i],
outputW_[i],
filterSizeZ_[i],
filterSizeY_[i],
filterSize_[i],
strideZ_[i],
strideY_[i],
stride_[i],
paddingZ_[i],
paddingY_[i],
padding_[i],
1.0,
1.0);
}
}
if (nullptr != this->biasParameter_) {
REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str());
this->addBias();
}
forwardActivation();
}
void DeConv3DLayer::backward(const UpdateCallback &callback) {
backwardActivation();
int batchSize = getOutputGrad()->getHeight();
if (biases_ && biases_->getWGrad()) {
bpropBiases();
biases_->getParameterPtr()->incUpdate(callback);
}
for (size_t i = 0; i < inputLayers_.size(); ++i) {
if (weights_[i]->getWGrad() || this->needGradient_) {
int M = M_[i];
int N = N_[i];
int K = K_[i];
REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str());
Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
const MatrixPtr &inMat = getInputValue(i);
for (int n = 0; n < batchSize; ++n) {
colBuf_->vol2Col(
getOutputGrad()->getData() + n * getOutputGrad()->getStride(),
numFilters_,
outputD_[i],
outputH_[i],
outputW_[i],
filterSizeZ_[i],
filterSizeY_[i],
filterSize_[i],
strideZ_[i],
strideY_[i],
stride_[i],
paddingZ_[i],
paddingY_[i],
padding_[i]);
if (weights_[i]->getWGrad()) {
real *inData = inMat->getData() + n * inMat->getStride();
for (int g = 0; g < groups_[i]; ++g) {
MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
MatrixPtr wGradMatSub =
weights_[i]->getWGrad()->subMatrix(g * K, K);
MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_);
wGradMatSub->mul(
*colBufDataSub, *(inMatSub->getTranspose()), 1.0, 1.0);
inData += M * N;
}
}
if (getInputGrad(i)) {
real *preGrad =
getInputGrad(i)->getData() + n * getInputGrad(i)->getStride();
for (int g = 0; g < groups_[i]; ++g) {
MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K);
MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K);
MatrixPtr inGradMatSub =
Matrix::create(preGrad, M, N, false, useGpu_);
inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 1.0);
preGrad += M * N;
}
}
}
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
weights_[i]->getParameterPtr()->incUpdate(callback);
}
}
}
void DeConv3DLayer::bpropWeights(int i) {}
void DeConv3DLayer::bpropData(int i) {}
void DeConv3DLayer::bpropBiases() {
const MatrixPtr &outGradMat = getOutputGrad();
if (this->sharedBiases_) {
biases_->getWGrad()->collectSharedBias(*outGradMat, 1.0f);
} else {
biases_->getWGrad()->collectBias(*outGradMat, 1.0f);
}
}
void DeConv3DLayer::addBias() {
MatrixPtr outMat = getOutputValue();
if (this->sharedBiases_) {
outMat->addSharedBias(*(biases_->getW()), 1.0f);
} else {
outMat->addBias(*(biases_->getW()), 1.0f);
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "ConvBaseLayer.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief A subclass of deconvolution3D layer.
* This layer expands input and use matrix multiplication to
* calculate deconvolution3D operation.
*/
class DeConv3DLayer : public ConvBaseLayer {
public:
explicit DeConv3DLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
~DeConv3DLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void addBias();
void backward(const UpdateCallback& callback);
void bpropBiases();
void bpropData(int i);
void bpropWeights(int i);
size_t getSize();
protected:
// Figure out the dimensions for individual gemms.
IntV M_; /// numFilters_ / filter_group_;
IntV N_; /// channels_ * filterSizeZ_ * filterSize_ * filterSizeY_
IntV K_; /// outputD_ * outputH_ * outputW_
IntV NOut_;
MatrixPtr colBuf_;
};
} // namespace paddle
...@@ -22,12 +22,31 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap, ...@@ -22,12 +22,31 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
/* Initialize the basic convolutional parent class */ /* Initialize the basic convolutional parent class */
ConvBaseLayer::init(layerMap, parameterMap); ConvBaseLayer::init(layerMap, parameterMap);
int index = 0;
for (auto &inputConfig : config_.inputs()) { for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf(); const ConvConfig &conf = inputConfig.conv_conf();
/* Consistent caffe mode for multiple input */ /* Consistent caffe mode for multiple input */
caffeMode_ = conf.caffe_mode(); caffeMode_ = conf.caffe_mode();
}
// create a new weight
size_t height, width;
height = filterPixels_[index] * filterChannels_[index];
width = (!isDeconv_) ? numFilters_ : channels_[index];
CHECK_EQ(parameters_[index]->getSize(), width * height);
Weight *w = new Weight(height, width, parameters_[index]);
weights_.emplace_back(w);
index++;
}
if (biasParameter_.get()) {
if (sharedBiases_) {
CHECK_EQ((size_t)numFilters_, biasParameter_->getSize());
biases_ =
std::unique_ptr<Weight>(new Weight(numFilters_, 1, biasParameter_));
} else {
biases_ =
std::unique_ptr<Weight>(new Weight(getSize(), 1, biasParameter_));
}
}
getOutputSize(); getOutputSize();
return true; return true;
......
...@@ -34,6 +34,13 @@ add_unittest_without_exec(test_CRFLayerGrad ...@@ -34,6 +34,13 @@ add_unittest_without_exec(test_CRFLayerGrad
add_test(NAME test_CRFLayerGrad add_test(NAME test_CRFLayerGrad
COMMAND test_CRFLayerGrad) COMMAND test_CRFLayerGrad)
################ test_CrossEntropyOverBeam ####################
add_unittest_without_exec(test_CrossEntropyOverBeam
test_CrossEntropyOverBeamGrad.cpp
LayerGradUtil.cpp)
add_test(NAME test_CrossEntropyOverBeam
COMMAND test_CrossEntropyOverBeam)
################ test_SeqSliceLayerGrad #################### ################ test_SeqSliceLayerGrad ####################
add_unittest_without_exec(test_SeqSliceLayerGrad add_unittest_without_exec(test_SeqSliceLayerGrad
test_SeqSliceLayerGrad.cpp test_SeqSliceLayerGrad.cpp
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <random>
#include <sstream>
#include <gtest/gtest.h>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
DECLARE_int32(gpu_id);
DECLARE_bool(thread_local_rand_use_global_seed);
const size_t MAX_SEQ_NUM = 23;
const size_t MAX_SEQ_LEN = 50;
const size_t MAX_BEAM_SIZE = 27;
const size_t SEED = (size_t)(time(NULL));
struct SingleBeamExpansion {
vector<int> seqStartPos;
vector<int> subSeqStartPos;
vector<real> candidateScores;
// TODO(caoying): store this into Argument.ids
vector<real> selectedIndices;
vector<int> groundTruth;
vector<size_t> inBeam;
vector<int> rowIdxInBeam;
vector<int> colIdxInBeam;
void resetGroundTruth(size_t n) {
groundTruth.clear();
groundTruth.resize(n, -1);
inBeam.clear();
inBeam.resize(n, 0);
rowIdxInBeam.clear();
rowIdxInBeam.resize(n, -1);
colIdxInBeam.clear();
colIdxInBeam.resize(n, -1);
}
};
inline float randFloat() {
return static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
}
void genRand(real* numbers, size_t n) {
default_random_engine generator;
uniform_real_distribution<real> distribution(0.0, 1.0);
for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator);
}
vector<real> randSampling(real range, int n) {
CHECK_GE(range, n);
vector<real> num(range);
iota(begin(num), end(num), 0.);
if (range == n) return num;
random_shuffle(begin(num), end(num));
num.resize(n);
sort(begin(num), end(num));
return num;
}
void genCandidateScores(bool hasSubseq,
size_t beamSize,
SingleBeamExpansion& prevBeam,
SingleBeamExpansion& curBeam) {
vector<int>& seqStartPos = curBeam.seqStartPos;
seqStartPos.resize(1, 0);
vector<int>& subSeqStartPos = curBeam.subSeqStartPos;
subSeqStartPos.resize(1, 0);
srand(SEED);
if (prevBeam.selectedIndices.size()) {
if (prevBeam.subSeqStartPos.size() > 1) {
int seqIdx = 1;
// samples in previous beam are nested sequences.
for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) {
for (size_t j = 0; j < beamSize; ++j) {
if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break;
subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) +
subSeqStartPos.back());
}
if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) {
seqStartPos.push_back(subSeqStartPos.back());
seqIdx++;
}
}
} else {
for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) {
if (i && i % beamSize == 0) {
seqStartPos.push_back(subSeqStartPos.back());
if (i == prevBeam.selectedIndices.size()) break;
}
if (prevBeam.selectedIndices[i] == -1.) continue;
subSeqStartPos.push_back(subSeqStartPos.back() +
(1 + (rand() % MAX_SEQ_LEN)));
}
}
} else {
// the first beam expansion
int seqNum = 1 + (rand() % MAX_SEQ_NUM);
for (int i = 0; i < seqNum; ++i) {
if (hasSubseq) {
for (size_t j = 0; j < 1 + (rand() % MAX_SEQ_NUM); ++j)
subSeqStartPos.push_back(subSeqStartPos.back() +
(1 + (rand() % MAX_SEQ_LEN)));
seqStartPos.push_back(subSeqStartPos.back());
} else {
seqStartPos.push_back(seqStartPos.back() +
(1 + (rand() % MAX_SEQ_LEN)));
}
}
}
size_t totalSeqNum = hasSubseq ? subSeqStartPos.back() : seqStartPos.back();
curBeam.candidateScores.resize(totalSeqNum, 0.);
genRand(curBeam.candidateScores.data(), totalSeqNum);
}
void genSelectedIndices(size_t beamSize,
vector<int>& seqStartPos,
vector<real>& selectedIndices) {
size_t selectedIdsCount = beamSize * (seqStartPos.size() - 1);
selectedIndices.resize(selectedIdsCount, -1.);
for (size_t i = 0; i < seqStartPos.size() - 1; ++i) {
int seqLen = seqStartPos[i + 1] - seqStartPos[i];
int n = min(seqLen, static_cast<int>(beamSize));
vector<real> ids = randSampling(seqLen, n);
memcpy(selectedIndices.data() + i * beamSize,
ids.data(),
sizeof(real) * ids.size());
}
}
void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
size_t beamSize) {
SingleBeamExpansion& beam = beamExpansions[1];
size_t seqNum = beam.seqStartPos.size() - 1;
for (size_t i = 2; i < beamExpansions.size(); ++i)
CHECK_EQ(seqNum, beamExpansions[i].seqStartPos.size() - 1);
srand(SEED);
// initialize the first beam.
beam.resetGroundTruth(seqNum);
for (size_t i = 0; i < seqNum; ++i) {
if (randFloat() > 0.5) {
/*
* force the randomly generated label falls in the beam by chance 0.5.
* otherwise, when sequence length is relatively long and beam size is
* relatively small, the gold sequences falls off the beam at in the
* first search.
*/
real* begPos = beam.selectedIndices.data() + i * beamSize;
beam.colIdxInBeam[i] =
rand() % count_if(begPos, begPos + beamSize, [](const real& val) {
return val != -1.;
});
beam.groundTruth[i] =
beam.selectedIndices[i * beamSize + beam.colIdxInBeam[i]];
beam.inBeam[i] = 1;
} else {
int label = rand() % (beam.seqStartPos[i + 1] - beam.seqStartPos[i]);
beam.groundTruth[i] = label;
real* begPos = beam.selectedIndices.data() + i * beamSize;
real* endPos = begPos + beamSize;
real* lblPos = find(begPos, endPos, real(label));
if (lblPos != endPos) {
beam.inBeam[i] = 1;
beam.colIdxInBeam[i] = lblPos - begPos;
}
}
beam.rowIdxInBeam[i] = i;
}
// iterate over each beam expansions
for (size_t i = 2; i < beamExpansions.size(); ++i) {
SingleBeamExpansion& curBeam = beamExpansions[i];
SingleBeamExpansion& prevBeam = beamExpansions[i - 1];
curBeam.resetGroundTruth(seqNum);
// iterate over each sequence
for (size_t j = 0; j < seqNum; ++j) {
if (!prevBeam.inBeam[j]) continue;
// gold sequence falls in the beam in previous search.
real* begPos = prevBeam.selectedIndices.data();
int offset =
prevBeam.rowIdxInBeam[j] * beamSize + prevBeam.colIdxInBeam[j];
curBeam.rowIdxInBeam[j] = count_if(
begPos, begPos + offset, [](const real& val) { return val != -1.; });
if (randFloat() > 0.5) {
// force the randomly generated label falls in the beam by chance 0.5.
real* start =
curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
int n = rand() % count_if(start, start + beamSize, [](const real& val) {
return val != -1.;
});
curBeam.colIdxInBeam[j] = n;
curBeam.groundTruth[j] = *(start + n);
curBeam.inBeam[j] = 1;
} else {
CHECK_LE(curBeam.rowIdxInBeam[j] + 1,
curBeam.subSeqStartPos.size() - 1);
int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]];
int end = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j] + 1];
CHECK_GT(size_t(end), size_t(start));
int label = rand() % (end - start);
curBeam.groundTruth[j] = label;
real* findBeg =
curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
real* lblPos =
find(findBeg, findBeg + beamSize, static_cast<real>(label));
if (lblPos != (findBeg + beamSize)) {
curBeam.inBeam[j] = 1;
curBeam.colIdxInBeam[j] = lblPos - findBeg;
}
}
}
}
}
void genOneBeam(size_t beamSize,
bool hasSubseq,
SingleBeamExpansion& prevBeam,
SingleBeamExpansion& curBeam) {
genCandidateScores(hasSubseq, beamSize, prevBeam, curBeam);
genSelectedIndices(beamSize,
hasSubseq ? curBeam.subSeqStartPos : curBeam.seqStartPos,
curBeam.selectedIndices);
}
void genRandomBeamExpansion(size_t expansionCount,
size_t beamSize,
vector<SingleBeamExpansion>& beamExpansions) {
beamExpansions.clear();
beamExpansions.resize(expansionCount + 1);
// beamExpansions[0] is reserved.
for (size_t i = 1; i <= expansionCount; ++i)
genOneBeam(beamSize, bool(i - 1), beamExpansions[i - 1], beamExpansions[i]);
genGroundTruth(beamExpansions, beamSize);
}
void testCrossEntropyOverBeam(bool useGpu,
size_t beamSize,
vector<SingleBeamExpansion>& beams) {
TestConfig config;
config.layerConfig.set_type("cross_entropy_over_beam");
size_t seqNum = 0;
for (size_t i = 1; i < beams.size(); ++i) {
const SingleBeamExpansion& beam = beams[i];
// create scores for all the candidates
MatrixPtr candidateScorePtr =
Matrix::create(beam.candidateScores.size(), 1, false, false);
candidateScorePtr->copyFrom(beam.candidateScores.data(),
beam.candidateScores.size());
ostringstream paramName;
paramName << "candidate_scores_" << i;
if (beam.subSeqStartPos.size() > 1) {
seqNum = beam.subSeqStartPos.size() - 1;
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
paramName.str(),
candidateScorePtr,
beam.seqStartPos,
beam.subSeqStartPos});
} else {
seqNum = beam.seqStartPos.size() - 1;
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
paramName.str(),
candidateScorePtr,
beam.seqStartPos});
}
config.layerConfig.add_inputs();
// create indices for the selected candidates
MatrixPtr selectedCandidates =
Matrix::create(seqNum, beamSize, false, false);
selectedCandidates->copyFrom(beam.selectedIndices.data(),
beam.selectedIndices.size());
paramName.clear();
paramName << "selected_candidates_" << i;
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, paramName.str(), selectedCandidates});
config.layerConfig.add_inputs();
// create the ground truth
paramName.clear();
paramName << "label_" << i;
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, paramName.str(), beam.groundTruth});
config.layerConfig.add_inputs();
}
testLayerGrad(
config, "cross_entropy_over_beam", seqNum, false, useGpu, false);
}
TEST(Layer, CrossEntropyOverBeam) {
LOG(INFO) << "SEED = " << SEED;
const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE;
LOG(INFO) << "beamSize = " << beamSize;
// TODO(caoying): test with random beam expansions.
const size_t expansionCount = 3;
vector<SingleBeamExpansion> beams;
genRandomBeamExpansion(expansionCount, beamSize, beams);
for (bool useGpu : {false, true})
testCrossEntropyOverBeam(useGpu, beamSize, beams);
}
int main(int argc, char** argv) {
initMain(argc, argv);
hl_start();
hl_init(FLAGS_gpu_id);
FLAGS_thread_local_rand_use_global_seed = true;
srand(SEED);
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
...@@ -2047,6 +2047,159 @@ TEST(Layer, RowL2NormLayer) { ...@@ -2047,6 +2047,159 @@ TEST(Layer, RowL2NormLayer) {
} }
} }
void test3DConvLayer(const string& type, bool trans, bool useGpu) {
// filter size
const int NUM_FILTERS = 6;
// const int CHANNELS = 3;
const int FILTER_SIZE = 3;
const int FILTER_SIZE_Y = 3;
const int FILTER_SIZE_Z = 3;
// input image
const int CHANNELS = 3;
const int IMAGE_SIZE = 9;
const int IMAGE_SIZE_Y = 9;
const int IMAGE_SIZE_Z = 9;
TestConfig config;
config.biasSize = NUM_FILTERS;
config.layerConfig.set_type(type);
config.layerConfig.set_num_filters(NUM_FILTERS);
config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true);
// Setting up conv3D-trans layer
LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf();
conv->set_channels(CHANNELS);
conv->set_filter_size(FILTER_SIZE);
conv->set_filter_size_y(FILTER_SIZE_Y);
conv->set_filter_size_z(FILTER_SIZE_Z);
conv->set_padding(0);
conv->set_padding_y(0);
conv->set_padding_z(0);
conv->set_stride(2);
conv->set_stride_y(2);
conv->set_stride_z(2);
conv->set_img_size(IMAGE_SIZE);
conv->set_img_size_y(IMAGE_SIZE_Y);
conv->set_img_size_z(IMAGE_SIZE_Z);
conv->set_output_x(outputSize(conv->img_size(),
conv->filter_size(),
conv->padding(),
conv->stride(),
/* caffeMode */ true));
conv->set_output_y(outputSize(conv->img_size_y(),
conv->filter_size_y(),
conv->padding_y(),
conv->stride_y(),
/* caffeMode */ true));
conv->set_output_z(outputSize(conv->img_size_z(),
conv->filter_size_z(),
conv->padding_z(),
conv->stride_z(),
/* caffeMode */ true));
config.layerConfig.set_size(conv->output_x() * conv->output_y() *
conv->output_z() * NUM_FILTERS);
conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
config.inputDefs.push_back(
{INPUT_DATA,
"layer_0",
CHANNELS * IMAGE_SIZE * IMAGE_SIZE_Y * IMAGE_SIZE_Z,
conv->filter_channels() * FILTER_SIZE * FILTER_SIZE_Y * FILTER_SIZE_Z *
NUM_FILTERS});
testLayerGrad(config, "conv3D", 10, trans, useGpu);
// Use small batch_size and useWeight=true to test biasGrad
testLayerGrad(config, "conv3D", 2, trans, useGpu, true, 0.02);
}
TEST(Layer, test3DConvLayer) {
test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU
test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true);
#endif
}
void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
// filter size
const int NUM_FILTERS = 6;
// const int CHANNELS = 3;
const int FILTER_SIZE = 3;
const int FILTER_SIZE_Y = 3;
const int FILTER_SIZE_Z = 3;
// input image
const int CHANNELS = 3;
const int IMAGE_SIZE = 4;
const int IMAGE_SIZE_Y = 6;
const int IMAGE_SIZE_Z = 6;
// Setting up conv-trans layer
TestConfig config;
config.biasSize = NUM_FILTERS;
config.layerConfig.set_type("deconv3d");
config.layerConfig.set_num_filters(NUM_FILTERS);
config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true);
LayerInputConfig* input = config.layerConfig.add_inputs();
ConvConfig* conv = input->mutable_conv_conf();
conv->set_channels(CHANNELS);
conv->set_filter_size(FILTER_SIZE);
conv->set_filter_size_y(FILTER_SIZE_Y);
conv->set_filter_size_z(FILTER_SIZE_Z);
conv->set_padding(0);
conv->set_padding_y(0);
conv->set_padding_z(0);
conv->set_stride(2);
conv->set_stride_y(2);
conv->set_stride_z(2);
conv->set_img_size(IMAGE_SIZE);
conv->set_img_size_y(IMAGE_SIZE_Y);
conv->set_img_size_z(IMAGE_SIZE_Z);
conv->set_output_x(imageSize(conv->img_size(),
conv->filter_size(),
conv->padding(),
conv->stride(),
true));
conv->set_output_y(imageSize(conv->img_size_y(),
conv->filter_size_y(),
conv->padding_y(),
conv->stride_y(),
true));
conv->set_output_z(imageSize(conv->img_size_z(),
conv->filter_size_z(),
conv->padding_z(),
conv->stride_z(),
true));
config.layerConfig.set_size(conv->output_x() * conv->output_y() *
conv->output_z() * NUM_FILTERS);
conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
config.inputDefs.push_back(
{INPUT_DATA,
"layer_0",
CHANNELS * IMAGE_SIZE * IMAGE_SIZE_Y * IMAGE_SIZE_Z,
conv->filter_channels() * FILTER_SIZE * FILTER_SIZE_Y * FILTER_SIZE_Z *
NUM_FILTERS});
testLayerGrad(config, "deconv3D", 10, trans, useGpu);
// Use small batch_size and useWeight=true to test biasGrad
testLayerGrad(config, "deconv3D", 2, trans, useGpu, true, 0.02);
}
TEST(Layer, test3DDeConvLayer) {
test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false);
#ifndef PADDLE_ONLY_CPU
test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true);
#endif
}
TEST(Layer, ScaleShiftLayer) { TEST(Layer, ScaleShiftLayer) {
const size_t batchSize = 16; const size_t batchSize = 16;
const size_t size = 32; const size_t size = 32;
......
...@@ -1389,6 +1389,72 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { ...@@ -1389,6 +1389,72 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
output_d, grad_d, mat_d, height_, width_); output_d, grad_d, mat_d, height_, width_);
} }
void GpuMatrix::vol2Col(real* dataSrc,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW) {
hl_matrix_vol2Col(dataSrc,
channels,
depth,
height,
width,
filterD,
filterH,
filterW,
strideD,
strideH,
strideW,
paddingD,
paddingH,
paddingW,
getData());
}
void GpuMatrix::col2Vol(real* dataDst,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real alpha,
real beta) {
hl_matrix_col2Vol(dataDst,
channels,
depth,
height,
width,
filterD,
filterH,
filterW,
strideD,
strideH,
strideW,
paddingD,
paddingH,
paddingW,
getData(),
alpha,
beta);
}
/** /**
* CpuMatrix * CpuMatrix
*/ */
...@@ -3975,6 +4041,95 @@ void CpuMatrix::bilinearBackward(const Matrix& out, ...@@ -3975,6 +4041,95 @@ void CpuMatrix::bilinearBackward(const Matrix& out,
} }
} }
void CpuMatrix::vol2Col(real* data,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW) {
real* outData = getData();
int outHeight = (height + 2 * paddingH - filterH) / strideH + 1;
int outWidth = (width + 2 * paddingW - filterW) / strideW + 1;
int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1;
int channelsCol = channels * filterD * filterH * filterW;
for (int c = 0; c < channelsCol; ++c) {
int wOffset = c % filterW;
int hOffset = (c / filterW) % filterH;
int dOffset = (c / filterW / filterH) % filterD;
int cIn = c / filterW / filterH / filterD;
for (int d = 0; d < outDepth; ++d) {
for (int h = 0; h < outHeight; ++h) {
for (int w = 0; w < outWidth; ++w) {
int dPad = d * strideD - paddingD + dOffset;
int hPad = h * strideH - paddingH + hOffset;
int wPad = w * strideW - paddingW + wOffset;
if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width &&
dPad >= 0 && dPad < depth)
outData[((c * outDepth + d) * outHeight + h) * outWidth + w] =
data[((cIn * depth + dPad) * height + hPad) * width + wPad];
else
outData[((c * outDepth + d) * outHeight + h) * outWidth + w] = 0;
}
}
}
}
}
void CpuMatrix::col2Vol(real* trg,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real alpha,
real beta) {
real* src = getData();
int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1;
int outHeight = (height + 2 * paddingH - filterH) / strideH + 1;
int outWidth = (width + 2 * paddingW - filterW) / strideW + 1;
int channelsCol = channels * filterD * filterH * filterW;
for (int c = 0; c < channelsCol; ++c) {
int wOffset = c % filterW;
int hOffset = (c / filterW) % filterH;
int dOffset = (c / filterW / filterH) % filterD;
int cIm = c / filterW / filterH / filterD;
for (int d = 0; d < outDepth; ++d) {
for (int h = 0; h < outHeight; ++h) {
for (int w = 0; w < outWidth; ++w) {
int dPad = d * strideD - paddingD + dOffset;
int hPad = h * strideH - paddingH + hOffset;
int wPad = w * strideW - paddingW + wOffset;
if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width &&
dPad >= 0 && dPad < depth)
trg[((cIm * depth + dPad) * height + hPad) * width + wPad] =
alpha *
src[((c * outDepth + d) * outHeight + h) * outWidth + w] +
beta *
trg[((cIm * depth + dPad) * height + hPad) * width + wPad];
}
}
}
}
}
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// functions executed via cpu // // functions executed via cpu //
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
......
...@@ -1039,6 +1039,42 @@ public: ...@@ -1039,6 +1039,42 @@ public:
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
virtual void vol2Col(real* data,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW) {
LOG(FATAL) << "Not implemeted";
}
virtual void col2Vol(real* trg,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real alpha,
real beta) {
LOG(FATAL) << "Not implemeted";
}
virtual void bilinearForward(const Matrix& in, virtual void bilinearForward(const Matrix& in,
const size_t inImgH, const size_t inImgH,
const size_t inImgW, const size_t inImgW,
...@@ -1374,6 +1410,38 @@ public: ...@@ -1374,6 +1410,38 @@ public:
const real ratioH, const real ratioH,
const real ratioW); const real ratioW);
void vol2Col(real* data,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW);
void col2Vol(real* trg,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real alpha,
real beta);
void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label);
void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label);
...@@ -1715,6 +1783,38 @@ public: ...@@ -1715,6 +1783,38 @@ public:
const real ratioH, const real ratioH,
const real ratioW); const real ratioW);
void vol2Col(real* data,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW);
void col2Vol(real* trg,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real alpha,
real beta);
template <typename ExpressionType> template <typename ExpressionType>
void operator=(const ExpressionType& expr) { void operator=(const ExpressionType& expr) {
TensorCpuApply<real>(*this, expr); TensorCpuApply<real>(*this, expr);
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "TensorCheck.h" #include "TensorCheck.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
...@@ -1203,4 +1204,105 @@ TEST(Matrix, warpCTC) { ...@@ -1203,4 +1204,105 @@ TEST(Matrix, warpCTC) {
} }
} }
void testMatrixCol2Vol(int depth, int height, int width) {
int channel = 3;
int filterX = 3, filterY = 4, filterZ = 5;
int strideX = 2, strideY = 2, strideZ = 2;
int padX = 1, padY = 1, padZ = 1;
MatrixPtr cpuImage =
std::make_shared<CpuMatrix>(channel, depth * height * width);
MatrixPtr gpuImage =
std::make_shared<GpuMatrix>(channel, depth * height * width);
cpuImage->randomizeUniform();
gpuImage->copyFrom(*cpuImage);
int outD = outputSize(depth, filterZ, padZ, strideZ, true);
int outH = outputSize(height, filterY, padY, strideY, true);
int outW = outputSize(width, filterX, padX, strideX, true);
int colBufHeight = channel * filterZ * filterY * filterX;
int colBufWidth = outD * outH * outW;
MatrixPtr cpuColBuf = std::make_shared<CpuMatrix>(colBufHeight, colBufWidth);
MatrixPtr gpuColBuf = std::make_shared<GpuMatrix>(colBufHeight, colBufWidth);
cpuColBuf->vol2Col(cpuImage->getData(),
channel,
depth,
height,
width,
filterZ,
filterY,
filterX,
strideZ,
strideY,
strideX,
padZ,
padY,
padX);
gpuColBuf->vol2Col(gpuImage->getData(),
channel,
depth,
height,
width,
filterZ,
filterY,
filterX,
strideZ,
strideY,
strideX,
padZ,
padY,
padX);
TensorCheckEqual(*cpuColBuf, *gpuColBuf);
cpuColBuf->randomizeUniform();
gpuColBuf->copyFrom(*cpuColBuf);
cpuColBuf->col2Vol(cpuImage->getData(),
channel,
depth,
height,
width,
filterZ,
filterY,
filterX,
strideZ,
strideY,
strideX,
padZ,
padY,
padX,
1.0,
1.0);
gpuColBuf->col2Vol(gpuImage->getData(),
channel,
depth,
height,
width,
filterZ,
filterY,
filterX,
strideZ,
strideY,
strideX,
padZ,
padY,
padX,
1.0,
1.0);
TensorCheckErr(*cpuImage, *gpuImage);
}
TEST(Matrix, col2Vol) {
for (auto depth : {9, 16, 64}) {
for (auto height : {9, 11, 128}) {
for (auto width : {9, 32, 128}) {
VLOG(3) << "depth=" << depth << " height=" << height
<< " width=" << width;
testMatrixCol2Vol(depth, height, width);
}
}
}
}
///////
#endif #endif
...@@ -186,6 +186,7 @@ void Argument::resizeAndCopyFrom(const Argument& src, ...@@ -186,6 +186,7 @@ void Argument::resizeAndCopyFrom(const Argument& src,
resizeAndCopy(strs, src.strs, useGpu, stream); resizeAndCopy(strs, src.strs, useGpu, stream);
frameWidth = src.frameWidth; frameWidth = src.frameWidth;
frameHeight = src.frameHeight; frameHeight = src.frameHeight;
frameDepth = src.frameDepth;
} }
int32_t Argument::resizeAndCopyFrom(const Argument& src, int32_t Argument::resizeAndCopyFrom(const Argument& src,
...@@ -206,6 +207,7 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src, ...@@ -206,6 +207,7 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
dataId = src.dataId; dataId = src.dataId;
frameWidth = src.frameWidth; frameWidth = src.frameWidth;
frameHeight = src.frameHeight; frameHeight = src.frameHeight;
frameDepth = src.frameDepth;
if (!src.sequenceStartPositions) { if (!src.sequenceStartPositions) {
// non-sequence input, copy samples directly // non-sequence input, copy samples directly
...@@ -677,6 +679,7 @@ void Argument::reorganizeSeqInfo( ...@@ -677,6 +679,7 @@ void Argument::reorganizeSeqInfo(
const ICpuGpuVectorPtr subSeqStartPos, const ICpuGpuVectorPtr subSeqStartPos,
std::vector<std::vector<int>>& reorganizedSeqInfo) { std::vector<std::vector<int>>& reorganizedSeqInfo) {
CHECK(seqStartPos); CHECK(seqStartPos);
reorganizedSeqInfo.clear();
int seqNum = seqStartPos->getSize() - 1; int seqNum = seqStartPos->getSize() - 1;
int* seqStarts = seqStartPos->getMutableData(false); int* seqStarts = seqStartPos->getMutableData(false);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -35,6 +32,7 @@ struct Argument { ...@@ -35,6 +32,7 @@ struct Argument {
strs(nullptr), strs(nullptr),
frameHeight(0), frameHeight(0),
frameWidth(0), frameWidth(0),
frameDepth(0),
sequenceStartPositions(nullptr), sequenceStartPositions(nullptr),
subSequenceStartPositions(nullptr), subSequenceStartPositions(nullptr),
cpuSequenceDims(nullptr), cpuSequenceDims(nullptr),
...@@ -64,6 +62,7 @@ struct Argument { ...@@ -64,6 +62,7 @@ struct Argument {
allCount = argument.allCount; allCount = argument.allCount;
frameHeight = argument.frameHeight; frameHeight = argument.frameHeight;
frameWidth = argument.frameWidth; frameWidth = argument.frameWidth;
frameDepth = argument.frameDepth;
dataId = argument.dataId; dataId = argument.dataId;
} }
...@@ -76,6 +75,7 @@ struct Argument { ...@@ -76,6 +75,7 @@ struct Argument {
// A dataBatch includes batchSize frames, one frame maybe not only vector // A dataBatch includes batchSize frames, one frame maybe not only vector
size_t frameHeight; size_t frameHeight;
size_t frameWidth; size_t frameWidth;
size_t frameDepth;
// If NULL, each position is treated independently. // If NULL, each position is treated independently.
// Otherwise, its size should be #NumberOfSequences + 1. // Otherwise, its size should be #NumberOfSequences + 1.
...@@ -136,8 +136,10 @@ struct Argument { ...@@ -136,8 +136,10 @@ struct Argument {
} }
size_t getFrameHeight() const { return frameHeight; } size_t getFrameHeight() const { return frameHeight; }
size_t getFrameWidth() const { return frameWidth; } size_t getFrameWidth() const { return frameWidth; }
size_t getFrameDepth() const { return frameDepth; }
void setFrameHeight(size_t h) { frameHeight = h; } void setFrameHeight(size_t h) { frameHeight = h; }
void setFrameWidth(size_t w) { frameWidth = w; } void setFrameWidth(size_t w) { frameWidth = w; }
void setFrameDepth(size_t d) { frameDepth = d; }
int64_t getNumSequences() const { int64_t getNumSequences() const {
return sequenceStartPositions ? sequenceStartPositions->getSize() - 1 return sequenceStartPositions ? sequenceStartPositions->getSize() - 1
......
...@@ -22,7 +22,6 @@ limitations under the License. */ ...@@ -22,7 +22,6 @@ limitations under the License. */
#include <arpa/inet.h> #include <arpa/inet.h>
#include <net/if.h> #include <net/if.h>
#include <net/if_arp.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <sstream> #include <sstream>
......
...@@ -85,6 +85,12 @@ message ConvConfig { ...@@ -85,6 +85,12 @@ message ConvConfig {
optional uint32 dilation = 15 [ default = 1 ]; optional uint32 dilation = 15 [ default = 1 ];
optional uint32 dilation_y = 16 [ default = 1 ]; optional uint32 dilation_y = 16 [ default = 1 ];
optional uint32 filter_size_z = 17 [ default = 1 ];
optional uint32 padding_z = 18 [ default = 1 ];
optional uint32 stride_z = 19 [ default = 1 ];
optional uint32 output_z = 20 [ default = 1 ];
optional uint32 img_size_z = 21 [ default = 1 ];
} }
message PoolConfig { message PoolConfig {
...@@ -502,6 +508,8 @@ message LayerConfig { ...@@ -502,6 +508,8 @@ message LayerConfig {
// for HuberRegressionLoss // for HuberRegressionLoss
optional double delta = 57 [ default = 1.0 ]; optional double delta = 57 [ default = 1.0 ];
optional uint64 depth = 58 [ default = 1 ];
} }
message EvaluatorConfig { message EvaluatorConfig {
......
...@@ -886,6 +886,36 @@ class Conv(Cfg): ...@@ -886,6 +886,36 @@ class Conv(Cfg):
config_assert(output_x <= 0) config_assert(output_x <= 0)
# please refer to the comments in proto/ModelConfig.proto
@config_class
class Conv3D(Cfg):
def __init__(self,
filter_size,
channels,
padding=None,
stride=None,
groups=None,
filter_channels=None,
output_x=None,
img_size=None,
caffe_mode=True,
filter_size_y=None,
padding_y=None,
stride_y=None,
filter_size_z=None,
padding_z=None,
stride_z=None):
self.add_keys(locals())
self.filter_size_y = filter_size_y if filter_size_y else filter_size
self.filter_size_z = filter_size_z if filter_size_z else filter_size
self.padding_y = padding_y if padding_y else padding
self.padding_z = padding_z if padding_z else padding
self.stride_y = stride_y if stride_y else stride
self.stride_z = stride_z if stride_z else stride
if output_x is not None:
config_assert(output_x <= 0)
@config_class @config_class
class BilinearInterp(Cfg): class BilinearInterp(Cfg):
def __init__(self, out_size_x=None, out_size_y=None, channels=None): def __init__(self, out_size_x=None, out_size_y=None, channels=None):
...@@ -1172,6 +1202,20 @@ def get_img_size(input_layer_name, channels): ...@@ -1172,6 +1202,20 @@ def get_img_size(input_layer_name, channels):
return img_size, img_size_y return img_size, img_size_y
def get_img3d_size(input_layer_name, channels):
input = g_layer_map[input_layer_name]
img_pixels = input.size / channels
img_size = input.width
img_size_y = input.height
img_size_z = input.depth
config_assert(
img_size * img_size_y * img_size_z == img_pixels,
"Input layer %s: Incorrect input image size %d * %d * %d for input image pixels %d"
% (input_layer_name, img_size, img_size_y, img_size_z, img_pixels))
return img_size, img_size_y, img_size_z
def parse_bilinear(bilinear, input_layer_name, bilinear_conf): def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
parse_image(bilinear, input_layer_name, bilinear_conf.image_conf) parse_image(bilinear, input_layer_name, bilinear_conf.image_conf)
bilinear_conf.out_size_x = bilinear.out_size_x bilinear_conf.out_size_x = bilinear.out_size_x
...@@ -1282,6 +1326,50 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): ...@@ -1282,6 +1326,50 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf.stride_y, conv_conf.caffe_mode) conv_conf.stride_y, conv_conf.caffe_mode)
#caffe_mode: compute the output size using floor instead of ceil,
# which is consistent of caffe and CuDNN's convention.
def parse_conv3d(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf.filter_size = conv.filter_size
conv_conf.filter_size_y = conv.filter_size_y
conv_conf.filter_size_z = conv.filter_size_z
conv_conf.channels = conv.channels
conv_conf.padding = conv.padding
conv_conf.padding_y = conv.padding_y
conv_conf.padding_z = conv.padding_z
conv_conf.stride = conv.stride
conv_conf.stride_y = conv.stride_y
conv_conf.stride_z = conv.stride_z
conv_conf.groups = conv.groups
conv_conf.caffe_mode = conv.caffe_mode
if not trans:
conv_conf.filter_channels = conv.channels / conv.groups
conv_conf.img_size, conv_conf.img_size_y, conv_conf.img_size_z = \
get_img3d_size(input_layer_name, conv.channels)
conv_conf.output_x = cnn_output_size(
conv_conf.img_size, conv_conf.filter_size, conv_conf.padding,
conv_conf.stride, conv_conf.caffe_mode)
conv_conf.output_y = cnn_output_size(
conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y,
conv_conf.stride_y, conv_conf.caffe_mode)
conv_conf.output_z = cnn_output_size(
conv_conf.img_size_z, conv_conf.filter_size_z, conv_conf.padding_z,
conv_conf.stride_z, conv_conf.caffe_mode)
else:
conv_conf.filter_channels = num_filters / conv.groups
conv_conf.output_x, conv_conf.output_y, conv_conf.output_z = \
get_img3d_size(input_layer_name, conv.channels)
conv_conf.img_size = cnn_image_size(
conv_conf.output_x, conv_conf.filter_size, conv_conf.padding,
conv_conf.stride, conv_conf.caffe_mode)
conv_conf.img_size_y = cnn_image_size(
conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y,
conv_conf.stride_y, conv_conf.caffe_mode)
conv_conf.img_size_z = cnn_image_size(
conv_conf.output_z, conv_conf.filter_size_z, conv_conf.padding_z,
conv_conf.stride_z, conv_conf.caffe_mode)
def parse_block_expand(block_expand, input_layer_name, block_expand_conf): def parse_block_expand(block_expand, input_layer_name, block_expand_conf):
block_expand_conf.channels = block_expand.channels block_expand_conf.channels = block_expand.channels
block_expand_conf.stride_x = block_expand.stride_x block_expand_conf.stride_x = block_expand.stride_x
...@@ -1585,6 +1673,9 @@ class LayerBase(object): ...@@ -1585,6 +1673,9 @@ class LayerBase(object):
self.config.height = height self.config.height = height
self.config.width = width self.config.width = width
def set_layer_depth(self, depth):
self.config.depth = depth
def set_cnn_layer(self, def set_cnn_layer(self,
input_layer_name, input_layer_name,
height, height,
...@@ -1607,6 +1698,21 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): ...@@ -1607,6 +1698,21 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha
@config_layer('cross_entropy_over_beam')
class CrossEntropyOverBeamLayer(LayerBase):
def __init__(self, name, inputs, **xargs):
config_assert(len(inputs) % 3 == 0, "Error input number.")
super(CrossEntropyOverBeamLayer, self).__init__(
name, 'cross_entropy_over_beam', 0, inputs, **xargs)
input_num = len(inputs) / 3
for i in range(input_num):
input_layer = self.get_input_layer(i * 3)
config_assert(input_layer.size == 1, (
"Inputs for this layer are made up of "
"several triples, in which the first one is scores over "
"all candidate paths, whose size should be equal to 1."))
@config_layer('fc') @config_layer('fc')
class FCLayer(LayerBase): class FCLayer(LayerBase):
layer_type = 'fc' layer_type = 'fc'
...@@ -1788,11 +1894,19 @@ class DetectionOutputLayer(LayerBase): ...@@ -1788,11 +1894,19 @@ class DetectionOutputLayer(LayerBase):
@config_layer('data') @config_layer('data')
class DataLayer(LayerBase): class DataLayer(LayerBase):
def __init__(self, name, size, height=None, width=None, device=None): def __init__(self,
name,
size,
height=None,
width=None,
depth=None,
device=None):
super(DataLayer, self).__init__( super(DataLayer, self).__init__(
name, 'data', size, inputs=[], device=device) name, 'data', size, inputs=[], device=device)
if height and width: if height and width:
self.set_layer_height_width(height, width) self.set_layer_height_width(height, width)
if depth:
self.set_layer_depth(depth)
''' '''
...@@ -1907,7 +2021,7 @@ class ConvLayerBase(LayerBase): ...@@ -1907,7 +2021,7 @@ class ConvLayerBase(LayerBase):
def calc_parameter_size(self, conv_conf): def calc_parameter_size(self, conv_conf):
return self.config.num_filters * conv_conf.filter_channels \ return self.config.num_filters * conv_conf.filter_channels \
* (conv_conf.filter_size * conv_conf.filter_size_y) * (conv_conf.filter_size * conv_conf.filter_size_y)
@config_layer('exconv') @config_layer('exconv')
...@@ -1991,6 +2105,87 @@ class ConvTransLayer(ConvTransLayerBase): ...@@ -1991,6 +2105,87 @@ class ConvTransLayer(ConvTransLayerBase):
layer_type = 'cudnn_convt' layer_type = 'cudnn_convt'
@config_layer('conv_3d')
class Conv3DLayerBase(LayerBase):
def __init__(self,
name,
inputs=[],
bias=True,
num_filters=None,
shared_biases=True,
**xargs):
super(Conv3DLayerBase, self).__init__(
name, self.layer_type, 0, inputs=inputs, **xargs)
if num_filters is not None:
self.config.num_filters = num_filters
# need to specify layer in config
self.config.type = self.layer_type
trans = False
if self.config.type == "deconv3d":
trans = True
if shared_biases is not None:
self.config.shared_biases = shared_biases
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
conv_conf = self.config.inputs[input_index].conv_conf
parse_conv3d(
self.inputs[input_index].conv,
input_layer.name,
conv_conf,
num_filters,
trans=trans
) # for z-axis pad:0, strid:1, filter_size:1, img_size:1
psize = self.calc_parameter_size(conv_conf)
self.create_input_parameter(input_index, psize)
if trans:
self.set_cnn_layer(name, conv_conf.img_size_z,
conv_conf.img_size_y, conv_conf.img_size,
self.config.num_filters)
else:
self.set_cnn_layer(name, conv_conf.output_z, conv_conf.output_y,
conv_conf.output_x, self.config.num_filters)
psize = self.config.size
if shared_biases:
psize = self.config.num_filters
self.create_bias_parameter(bias, psize, [psize, 1])
def calc_parameter_size(self, conv_conf):
return self.config.num_filters * conv_conf.filter_channels \
* (conv_conf.filter_size * conv_conf.filter_size_y \
* conv_conf.filter_size_z)
def set_cnn_layer(self,
input_layer_name,
depth,
height,
width,
channels,
is_print=True):
size = depth * height * width * channels
self.set_layer_size(size)
self.set_layer_height_width(height, width)
self.set_layer_depth(depth)
if is_print:
print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" %
(input_layer_name, channels, depth, height, width, size))
@config_layer('conv3d')
class Conv3DLayer(Conv3DLayerBase):
layer_type = 'conv3d'
@config_layer('deconv3d')
class Conv3DLayer(Conv3DLayerBase):
layer_type = 'deconv3d'
@config_layer('norm') @config_layer('norm')
class NormLayer(LayerBase): class NormLayer(LayerBase):
def __init__(self, name, inputs, **xargs): def __init__(self, name, inputs, **xargs):
...@@ -2268,6 +2463,7 @@ def define_cost(class_name, cost_type): ...@@ -2268,6 +2463,7 @@ def define_cost(class_name, cost_type):
define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy') define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy')
define_cost('CrossEntropyOverBeamCostLayer', 'cross_entropy_over_beam')
define_cost('RankingCost', 'rank-cost') define_cost('RankingCost', 'rank-cost')
define_cost('AucValidation', 'auc-validation') define_cost('AucValidation', 'auc-validation')
define_cost('PnpairValidation', 'pnpair-validation') define_cost('PnpairValidation', 'pnpair-validation')
......
文件模式从 100755 更改为 100644
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import functools import functools
import collections import collections
import inspect import inspect
...@@ -106,6 +105,8 @@ __all__ = [ ...@@ -106,6 +105,8 @@ __all__ = [
'nce_layer', 'nce_layer',
'cross_entropy_with_selfnorm', 'cross_entropy_with_selfnorm',
'cross_entropy', 'cross_entropy',
'BeamInput',
'cross_entropy_over_beam',
'multi_binary_label_cross_entropy', 'multi_binary_label_cross_entropy',
'sum_cost', 'sum_cost',
'rank_cost', 'rank_cost',
...@@ -138,6 +139,7 @@ __all__ = [ ...@@ -138,6 +139,7 @@ __all__ = [
'seq_slice_layer', 'seq_slice_layer',
'kmax_sequence_score_layer', 'kmax_sequence_score_layer',
'scale_shift_layer', 'scale_shift_layer',
'img_conv3d_layer',
] ]
...@@ -219,12 +221,16 @@ class LayerType(object): ...@@ -219,12 +221,16 @@ class LayerType(object):
CRF_DECODING_LAYER = 'crf_decoding' CRF_DECODING_LAYER = 'crf_decoding'
NCE_LAYER = 'nce' NCE_LAYER = 'nce'
CONV3D_LAYER = 'conv3d'
DECONV3D_LAYER = 'deconv3d'
RANK_COST = 'rank-cost' RANK_COST = 'rank-cost'
LAMBDA_COST = 'lambda_cost' LAMBDA_COST = 'lambda_cost'
HUBER_REGRESSION = 'huber_regression' HUBER_REGRESSION = 'huber_regression'
HUBER_CLASSIFICATION = 'huber_classification' HUBER_CLASSIFICATION = 'huber_classification'
CROSS_ENTROPY = 'multi-class-cross-entropy' CROSS_ENTROPY = 'multi-class-cross-entropy'
CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
CROSS_ENTROPY_OVER_BEAM = 'cross_entropy_over_beam'
SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy' SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy' MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy'
SUM_COST = 'sum_cost' SUM_COST = 'sum_cost'
...@@ -894,7 +900,8 @@ def mixed_layer(size=0, ...@@ -894,7 +900,8 @@ def mixed_layer(size=0,
@layer_support() @layer_support()
def data_layer(name, size, height=None, width=None, layer_attr=None): def data_layer(name, size, height=None, width=None, depth=None,
layer_attr=None):
""" """
Define DataLayer For NeuralNetwork. Define DataLayer For NeuralNetwork.
...@@ -921,15 +928,18 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): ...@@ -921,15 +928,18 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
type=LayerType.DATA, type=LayerType.DATA,
name=name, name=name,
size=size, size=size,
depth=depth,
height=height, height=height,
width=width, width=width,
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
if depth is None:
depth = 1
num_filters = None num_filters = None
if height is not None and width is not None: if height is not None and width is not None:
num_filters = size / (width * height) num_filters = size / (width * height * depth)
assert num_filters * width * height == size, \ assert num_filters * width * height*depth == size, \
"size=%s width=%s height=%s" % (size, width, height) "size=%s width=%s height=%s depth=%s" % (size, width, height, depth)
return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters) return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters)
...@@ -4071,8 +4081,12 @@ def __cost_input__(input, label, weight=None): ...@@ -4071,8 +4081,12 @@ def __cost_input__(input, label, weight=None):
""" """
inputs and parents for cost layers. inputs and parents for cost layers.
""" """
ipts = [Input(input.name), Input(label.name)] if isinstance(input, LayerOutput):
parents = [input, label] input = [input]
if isinstance(label, LayerOutput):
label = [label]
ipts = [Input(ipt.name) for ipt in (input + label)]
parents = [ipt for ipt in (input + label)]
if weight is not None: if weight is not None:
assert weight.size == 1 assert weight.size == 1
ipts.append(Input(weight.name)) ipts.append(Input(weight.name))
...@@ -5059,17 +5073,6 @@ def warp_ctc_layer(input, ...@@ -5059,17 +5073,6 @@ def warp_ctc_layer(input,
building process, PaddlePaddle will clone the source codes, build and building process, PaddlePaddle will clone the source codes, build and
install it to :code:`third_party/install/warpctc` directory. install it to :code:`third_party/install/warpctc` directory.
To use warp_ctc layer, you need to specify the path of :code:`libwarpctc.so`,
using following methods:
1. Set it in :code:`paddle.init` (python api) or :code:`paddle_init` (c api),
such as :code:`paddle.init(use_gpu=True,
warpctc_dir=your_paddle_source_dir/third_party/install/warpctc/lib)`.
2. Set environment variable LD_LIBRARY_PATH on Linux or DYLD_LIBRARY_PATH
on Mac OS. For instance, :code:`export
LD_LIBRARY_PATH=your_paddle_source_dir/third_party/install/warpctc/lib:$LD_LIBRARY_PATH`.
More details of CTC can be found by referring to `Connectionist Temporal More details of CTC can be found by referring to `Connectionist Temporal
Classification: Labelling Unsegmented Sequence Data with Recurrent Classification: Labelling Unsegmented Sequence Data with Recurrent
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/ Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/
...@@ -5777,10 +5780,10 @@ def multi_binary_label_cross_entropy(input, ...@@ -5777,10 +5780,10 @@ def multi_binary_label_cross_entropy(input,
if input.activation is None or \ if input.activation is None or \
not isinstance(input.activation, SigmoidActivation): not isinstance(input.activation, SigmoidActivation):
logger.log( logger.log(logging.WARN,
logging.WARN, ("%s is not a recommended activation for "
"%s is not recommend for multi_binary_label_cross_entropy's activation, " "multi_binary_label_cross_entropy, sigmoid is better") %
"maybe the sigmoid is better" % repr(input.activation)) repr(input.activation))
Layer( Layer(
name=name, name=name,
...@@ -5795,6 +5798,113 @@ def multi_binary_label_cross_entropy(input, ...@@ -5795,6 +5798,113 @@ def multi_binary_label_cross_entropy(input,
size=1) size=1)
class BeamInput(object):
"""
Define the input for cross_entropy_over_beam layer.
A beam is made up of a triple: the first one is scores over all
candidates; the second one is indices of top k selected candidates; the
third one is the index of ground truth, which is also always called
gold.
"""
def __init__(self, candidate_scores, selected_candidates, gold):
assert isinstance(candidate_scores, LayerOutput)
self.candidate_scores = candidate_scores
assert candidate_scores.size == 1
assert isinstance(selected_candidates, LayerOutput)
self.selected_candidates = selected_candidates
assert isinstance(gold, LayerOutput)
self.gold = gold
@wrap_name_default()
@layer_support()
def cross_entropy_over_beam(input, name=None):
"""
This layer is used in learning to search models, which is to solve complex
joint prediction problems based on learning to search through a
problem-defined search space.
Specifically, the learning to search process for this layer begins with
searching a target sequence from a nested sequence. In the first search
step, top beam size sequences with highest scores, indices of these top k
sequences in the original nested sequence, and the ground truth (also
called gold) altogether (a triple) make up of the first beam.
Then, several special positions, for example, start and end positions
that define meaningful segments are searched. In these searches, top k
positions with highest scores are selected, and then sequence, starting
from the selected starts till ends of the sequences (or a fixed position)
are taken to search next.
We call the possible top k results returned in one search the beam. This
search process can be repeated for pre-defined turns and leads to several
beam expansions.
Finally, the layer cross_entropy_over_beam takes all the beam expansions
which contain several candidate targets found along the multi-step search.
cross_entropy_over_beam calculates cross entropy over the expanded beams
which all the candidates in the beam as the normalized factor.
Note that, if gold falls off the beam at search step t, then the cost is
calculated over the beam at step t.
This cost layer always works together with kmax_sequence_score_layer,
sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a
sub-search space.
The example usage is:
.. code-block:: python
cost = cross_entropy_over_beam(input=[
BeamInput(
candidate_scores=beam1_candidates,
selected_candidates=beam1_topk,
gold=gold1),
BeamInput(
candidate_scores=beam2_candidates,
selected_candidates=beam2_topk,
gold=gold2),
])
:param input: input beams for this layer.
:type input: BeamInput
:param name: input beams for this layer.
:type name: basestring
:return: LayerOutput object.
:rtype: LayerOutput
"""
if isinstance(input, BeamInput):
input = [input]
else:
assert isinstance(input, list), (
'input for cross_entropy_over_beam shold be a python list '
'of BeamInput object.')
for ipt in input:
assert isinstance(ipt, BeamInput), (
'input for cross_entropy_over_beam '
'should be a BeamInput object.')
ipts = []
parents = []
for beam in input:
parents += [beam.candidate_scores, beam.selected_candidates, beam.gold]
ipts += [
beam.candidate_scores.name, beam.selected_candidates.name,
beam.gold.name
]
Layer(name=name, type=LayerType.CROSS_ENTROPY_OVER_BEAM, inputs=ipts)
return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
...@@ -6381,6 +6491,149 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1): ...@@ -6381,6 +6491,149 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size) name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size)
@wrap_name_default("conv3d")
@wrap_param_attr_default()
@wrap_bias_attr_default()
@wrap_act_default(act=ReluActivation())
@layer_support(DROPOUT)
def img_conv3d_layer(input,
filter_size,
num_filters,
name=None,
num_channels=None,
act=None,
groups=1,
stride=1,
padding=0,
bias_attr=None,
param_attr=None,
shared_biases=True,
layer_attr=None,
trans=False,
layer_type=None):
"""
The example usage is:
.. code-block:: python
conv = img_conv3d_layer(input=data, filter_size=1,
num_channels=8,
num_filters=16, stride=1,
bias_attr=False,
act=ReluActivation())
:param name: Layer name.
:type name: basestring
:param input: Layer Input.
:type input: LayerOutput
:param filter_size: The x dimension of a filter kernel. Or input a list.
:type filter_size: int|tuple|list
:param num_filters: Each filter group's number of filter
:param act: Activation type. Default is tanh
:type act: BaseActivation
:param groups: Group size of filters.
:type groups: int
:param stride: The x dimension of the stride. Or input a tuple for two image
dimension.
:type stride: int|tuple|list
:param padding: The x dimension of the padding. Or input a tuple for two
image dimension
:type padding: int|tuple|list
:param bias_attr: Convolution bias attribute. None means default bias.
False means no bias.
:type bias_attr: ParameterAttribute|False
:param num_channels: number of input channels. If None will be set
automatically from previous output.
:type num_channels: int
:param param_attr: Convolution param attribute. None means default attribute
:type param_attr: ParameterAttribute
:param shared_biases: Is biases will be shared between filters or not.
:type shared_biases: bool
:param layer_attr: Layer Extra Attribute.
:type layer_attr: ExtraLayerAttribute
:param trans: true if it is a convTransLayer, false if it is a convLayer
:type trans: bool
:param layer_type: specify the layer_type, default is None. If trans=True,
layer_type has to be "exconvt" or "cudnn_convt",
otherwise layer_type has to be either "exconv" or
"cudnn_conv"
:type layer_type: String
:return: LayerOutput object.
:rtype: LayerOutput
"""
if num_channels is None:
assert input.num_filters is not None
num_channels = input.num_filters
if isinstance(filter_size, collections.Sequence):
assert len(filter_size) == 3
filter_size, filter_size_y, filter_size_z = filter_size
else:
filter_size_y = filter_size
filter_size_z = filter_size
if isinstance(stride, collections.Sequence):
assert len(stride) == 3
stride, stride_y, stride_z = stride
else:
stride_y = stride
stride_z = stride
if isinstance(padding, collections.Sequence):
assert len(padding) == 3
padding, padding_y, padding_z = padding
else:
padding_y = padding
padding_z = padding
if param_attr.attr.get('initial_smart'):
# special initial for conv layers.
init_w = (2.0 / (filter_size**2 * num_channels))**0.5
param_attr.attr["initial_mean"] = 0.0
param_attr.attr["initial_std"] = init_w
param_attr.attr["initial_strategy"] = 0
param_attr.attr["initial_smart"] = False
if layer_type:
if trans:
assert layer_type in ["deconv3d"]
lt = layer_type
else:
lt = LayerType.DECONV3D_LAYER if trans else LayerType.CONV3D_LAYER
l = Layer(
name=name,
inputs=Input(
input.name,
conv=Conv3D(
filter_size=filter_size,
padding=padding,
stride=stride,
channels=num_channels,
groups=groups,
filter_size_y=filter_size_y,
padding_y=padding_y,
stride_y=stride_y,
filter_size_z=filter_size_z,
padding_z=padding_z,
stride_z=stride_z),
**param_attr.attr),
active_type=act.name,
num_filters=num_filters,
bias=ParamAttr.to_bias(bias_attr),
shared_biases=shared_biases,
type=lt,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name,
lt,
parents=[input],
activation=act,
num_filters=num_filters,
size=l.config.size)
@wrap_name_default("scale_shift") @wrap_name_default("scale_shift")
@wrap_param_attr_default() @wrap_param_attr_default()
@wrap_bias_attr_default() @wrap_bias_attr_default()
......
文件模式从 100755 更改为 100644
...@@ -9,6 +9,6 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer ...@@ -9,6 +9,6 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer
test_seq_slice_layer) test_seq_slice_layer test_cross_entropy_over_beam test_conv3d_layer test_deconv3d_layer)
export whole_configs=(test_split_datasource) export whole_configs=(test_split_datasource)
type: "nn"
layers {
name: "data"
type: "data"
size: 36288
active_type: ""
height: 48
width: 42
depth: 6
}
layers {
name: "conv3d_1"
type: "conv3d"
size: 24192
active_type: ""
inputs {
input_layer_name: "data"
input_parameter_name: "_conv3d_1.w0"
conv_conf {
filter_size: 3
channels: 3
stride: 2
padding: 1
groups: 1
filter_channels: 3
output_x: 21
img_size: 42
caffe_mode: true
filter_size_y: 3
padding_y: 1
stride_y: 2
output_y: 24
img_size_y: 48
filter_size_z: 3
padding_z: 1
stride_z: 2
output_z: 3
img_size_z: 6
}
}
bias_parameter_name: "_conv3d_1.wbias"
num_filters: 16
shared_biases: true
height: 24
width: 21
depth: 3
}
layers {
name: "conv3d_2"
type: "conv3d"
size: 24192
active_type: ""
inputs {
input_layer_name: "data"
input_parameter_name: "_conv3d_2.w0"
conv_conf {
filter_size: 3
channels: 3
stride: 2
padding: 1
groups: 1
filter_channels: 3
output_x: 21
img_size: 42
caffe_mode: true
filter_size_y: 3
padding_y: 1
stride_y: 2
output_y: 24
img_size_y: 48
filter_size_z: 3
padding_z: 1
stride_z: 2
output_z: 3
img_size_z: 6
}
}
bias_parameter_name: "_conv3d_2.wbias"
num_filters: 16
shared_biases: true
height: 24
width: 21
depth: 3
}
parameters {
name: "_conv3d_1.w0"
size: 1296
initial_mean: 0.0
initial_std: 0.272165526976
initial_strategy: 0
initial_smart: false
}
parameters {
name: "_conv3d_1.wbias"
size: 16
initial_mean: 0.0
initial_std: 0.0
dims: 16
dims: 1
initial_strategy: 0
initial_smart: false
}
parameters {
name: "_conv3d_2.w0"
size: 1296
initial_mean: 0.0
initial_std: 0.272165526976
initial_strategy: 0
initial_smart: false
}
parameters {
name: "_conv3d_2.wbias"
size: 16
initial_mean: 0.0
initial_std: 0.0
dims: 16
dims: 1
initial_strategy: 0
initial_smart: false
}
input_layer_names: "data"
output_layer_names: "conv3d_2"
sub_models {
name: "root"
layer_names: "data"
layer_names: "conv3d_1"
layer_names: "conv3d_2"
input_layer_names: "data"
output_layer_names: "conv3d_2"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "sentence_states"
type: "data"
size: 32
active_type: ""
}
layers {
name: "sentence_scores"
type: "data"
size: 1
active_type: ""
}
layers {
name: "__kmax_sequence_score_layer_0__"
type: "kmax_seq_score"
active_type: ""
inputs {
input_layer_name: "sentence_scores"
}
beam_size: 5
}
layers {
name: "__sub_nested_seq_layer_0__"
type: "sub_nested_seq"
size: 32
active_type: ""
inputs {
input_layer_name: "sentence_states"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_0__"
}
}
layers {
name: "__fc_layer_0__"
type: "fc"
size: 1
active_type: ""
inputs {
input_layer_name: "__sub_nested_seq_layer_0__"
input_parameter_name: "___fc_layer_0__.w0"
}
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers {
name: "__kmax_sequence_score_layer_1__"
type: "kmax_seq_score"
active_type: ""
inputs {
input_layer_name: "sentence_scores"
}
beam_size: 5
}
layers {
name: "__seq_slice_layer_0__"
type: "seq_slice"
size: 32
active_type: ""
inputs {
input_layer_name: "__sub_nested_seq_layer_0__"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_1__"
}
select_first: true
}
layers {
name: "__fc_layer_1__"
type: "fc"
size: 1
active_type: ""
inputs {
input_layer_name: "__seq_slice_layer_0__"
input_parameter_name: "___fc_layer_1__.w0"
}
bias_parameter_name: "___fc_layer_1__.wbias"
}
layers {
name: "__kmax_sequence_score_layer_2__"
type: "kmax_seq_score"
active_type: ""
inputs {
input_layer_name: "__fc_layer_1__"
}
beam_size: 5
}
layers {
name: "sentences_ids"
type: "data"
size: 1
active_type: ""
}
layers {
name: "start_ids"
type: "data"
size: 1
active_type: ""
}
layers {
name: "end_ids"
type: "data"
size: 1
active_type: ""
}
layers {
name: "__cross_entropy_over_beam_0__"
type: "cross_entropy_over_beam"
active_type: ""
inputs {
input_layer_name: "sentence_scores"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_0__"
}
inputs {
input_layer_name: "sentences_ids"
}
inputs {
input_layer_name: "__fc_layer_0__"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_1__"
}
inputs {
input_layer_name: "start_ids"
}
inputs {
input_layer_name: "__fc_layer_1__"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_2__"
}
inputs {
input_layer_name: "end_ids"
}
}
parameters {
name: "___fc_layer_0__.w0"
size: 32
initial_mean: 0.0
initial_std: 0.176776695297
dims: 32
dims: 1
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__.wbias"
size: 1
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 1
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___fc_layer_1__.w0"
size: 32
initial_mean: 0.0
initial_std: 0.176776695297
dims: 32
dims: 1
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_1__.wbias"
size: 1
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 1
initial_strategy: 0
initial_smart: false
}
input_layer_names: "sentence_scores"
input_layer_names: "sentences_ids"
input_layer_names: "sentence_states"
input_layer_names: "start_ids"
input_layer_names: "end_ids"
output_layer_names: "__cross_entropy_over_beam_0__"
sub_models {
name: "root"
layer_names: "sentence_states"
layer_names: "sentence_scores"
layer_names: "__kmax_sequence_score_layer_0__"
layer_names: "__sub_nested_seq_layer_0__"
layer_names: "__fc_layer_0__"
layer_names: "__kmax_sequence_score_layer_1__"
layer_names: "__seq_slice_layer_0__"
layer_names: "__fc_layer_1__"
layer_names: "__kmax_sequence_score_layer_2__"
layer_names: "sentences_ids"
layer_names: "start_ids"
layer_names: "end_ids"
layer_names: "__cross_entropy_over_beam_0__"
input_layer_names: "sentence_scores"
input_layer_names: "sentences_ids"
input_layer_names: "sentence_states"
input_layer_names: "start_ids"
input_layer_names: "end_ids"
output_layer_names: "__cross_entropy_over_beam_0__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "data"
type: "data"
size: 36288
active_type: ""
height: 48
width: 42
depth: 6
}
layers {
name: "deconv3d_1"
type: "deconv3d"
size: 1387760
active_type: ""
inputs {
input_layer_name: "data"
input_parameter_name: "_deconv3d_1.w0"
conv_conf {
filter_size: 3
channels: 3
stride: 2
padding: 1
groups: 1
filter_channels: 16
output_x: 42
img_size: 83
caffe_mode: true
filter_size_y: 3
padding_y: 1
stride_y: 2
output_y: 48
img_size_y: 95
filter_size_z: 3
padding_z: 1
stride_z: 2
output_z: 6
img_size_z: 11
}
}
bias_parameter_name: "_deconv3d_1.wbias"
num_filters: 16
shared_biases: true
height: 95
width: 83
depth: 11
}
layers {
name: "deconv3d_2"
type: "deconv3d"
size: 1387760
active_type: ""
inputs {
input_layer_name: "data"
input_parameter_name: "_deconv3d_2.w0"
conv_conf {
filter_size: 3
channels: 3
stride: 2
padding: 1
groups: 1
filter_channels: 16
output_x: 42
img_size: 83
caffe_mode: true
filter_size_y: 3
padding_y: 1
stride_y: 2
output_y: 48
img_size_y: 95
filter_size_z: 3
padding_z: 1
stride_z: 2
output_z: 6
img_size_z: 11
}
}
bias_parameter_name: "_deconv3d_2.wbias"
num_filters: 16
shared_biases: true
height: 95
width: 83
depth: 11
}
parameters {
name: "_deconv3d_1.w0"
size: 6912
initial_mean: 0.0
initial_std: 0.272165526976
initial_strategy: 0
initial_smart: false
}
parameters {
name: "_deconv3d_1.wbias"
size: 16
initial_mean: 0.0
initial_std: 0.0
dims: 16
dims: 1
initial_strategy: 0
initial_smart: false
}
parameters {
name: "_deconv3d_2.w0"
size: 6912
initial_mean: 0.0
initial_std: 0.272165526976
initial_strategy: 0
initial_smart: false
}
parameters {
name: "_deconv3d_2.wbias"
size: 16
initial_mean: 0.0
initial_std: 0.0
dims: 16
dims: 1
initial_strategy: 0
initial_smart: false
}
input_layer_names: "data"
output_layer_names: "deconv3d_2"
sub_models {
name: "root"
layer_names: "data"
layer_names: "deconv3d_1"
layer_names: "deconv3d_2"
input_layer_names: "data"
output_layer_names: "deconv3d_2"
is_recurrent_layer_group: false
}
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
num_channels = 3
filter_size = 3
filter_size_y = 3
filter_size_z = 3
stride = 2
stride_y = 2
stride_z = 2
padding = 1
padding_y = 1
padding_z = 1
groups = 1
data = data_layer(
name='data', size=12096 * num_channels, height=48, width=42, depth=6)
# first
conv3d_1 = img_conv3d_layer(
input=data,
name='conv3d_1',
num_filters=16,
num_channels=num_channels,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
bias_attr=True,
shared_biases=True,
trans=False,
layer_type="conv3d",
act=LinearActivation())
# second
conv3d_2 = img_conv3d_layer(
input=data,
name='conv3d_2',
num_filters=16,
num_channels=num_channels,
filter_size=[filter_size, filter_size_y, filter_size_z],
stride=[stride, stride_y, stride_z],
padding=[padding, padding_y, padding_z],
groups=groups,
bias_attr=True,
shared_biases=True,
trans=False,
layer_type="conv3d",
act=LinearActivation())
outputs(conv3d_2)
#!/usr/bin/env python
#coding=utf-8
from paddle.trainer_config_helpers import *
beam_size = 5
# the first beam expansion.
sentence_states = data_layer(name="sentence_states", size=32)
sentence_scores = data_layer(name="sentence_scores", size=1)
topk_sentence_ids = kmax_sequence_score_layer(
input=sentence_scores, beam_size=beam_size)
# the second beam expansion.
topk_sen = sub_nested_seq_layer(
input=sentence_states, selected_indices=topk_sentence_ids)
start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation())
topk_start_pos_ids = kmax_sequence_score_layer(
input=sentence_scores, beam_size=beam_size)
# the final beam expansion.
topk_start_spans = seq_slice_layer(
input=topk_sen, starts=topk_start_pos_ids, ends=None)
end_pos_scores = fc_layer(
input=topk_start_spans, size=1, act=LinearActivation())
topk_end_pos_ids = kmax_sequence_score_layer(
input=end_pos_scores, beam_size=beam_size)
# define the cost
sentence_idx = data_layer(name="sentences_ids", size=1)
start_idx = data_layer(name="start_ids", size=1)
end_idx = data_layer(name="end_ids", size=1)
cost = cross_entropy_over_beam(input=[
BeamInput(
candidate_scores=sentence_scores,
selected_candidates=topk_sentence_ids,
gold=sentence_idx), BeamInput(
candidate_scores=start_pos_scores,
selected_candidates=topk_start_pos_ids,
gold=start_idx), BeamInput(
candidate_scores=end_pos_scores,
selected_candidates=topk_end_pos_ids,
gold=end_idx)
])
outputs(cost)
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
num_channels = 3
filter_size = 3
filter_size_y = 3
filter_size_z = 3
stride = 2
stride_y = 2
stride_z = 2
padding = 1
padding_y = 1
padding_z = 1
groups = 1
data = data_layer(
name='data', size=12096 * num_channels, height=48, width=42, depth=6)
# first
deconv3d_1 = img_conv3d_layer(
input=data,
name='deconv3d_1',
num_filters=16,
num_channels=num_channels,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
bias_attr=True,
shared_biases=True,
trans=True,
layer_type="deconv3d",
act=LinearActivation())
# second
deconv3d_2 = img_conv3d_layer(
input=data,
name='deconv3d_2',
num_filters=16,
num_channels=num_channels,
filter_size=[filter_size, filter_size_y, filter_size_z],
stride=[stride, stride_y, stride_z],
padding=[padding, padding_y, padding_z],
groups=groups,
bias_attr=True,
shared_biases=True,
trans=True,
layer_type="deconv3d",
act=LinearActivation())
outputs(deconv3d_2)
...@@ -17,3 +17,4 @@ from paddle.trainer.config_parser import parse_config_and_serialize ...@@ -17,3 +17,4 @@ from paddle.trainer.config_parser import parse_config_and_serialize
if __name__ == '__main__': if __name__ == '__main__':
parse_config_and_serialize( parse_config_and_serialize(
'trainer_config_helpers/tests/layers_test_config.py', '') 'trainer_config_helpers/tests/layers_test_config.py', '')
# layers_test_config.py
...@@ -70,7 +70,7 @@ class Inference(object): ...@@ -70,7 +70,7 @@ class Inference(object):
item = [each_result[each_field] for each_field in field] item = [each_result[each_field] for each_field in field]
yield item yield item
def infer(self, input, field='value', **kwargs): def infer(self, input, field='value', flatten_result=True, **kwargs):
""" """
Infer a data by model. Infer a data by model.
:param input: input data batch. Should be python iterable object. :param input: input data batch. Should be python iterable object.
...@@ -83,7 +83,10 @@ class Inference(object): ...@@ -83,7 +83,10 @@ class Inference(object):
retv = [[] for i in xrange(len(result))] retv = [[] for i in xrange(len(result))]
for i, item in enumerate(result): for i, item in enumerate(result):
retv[i].append(item) retv[i].append(item)
retv = [numpy.concatenate(out) for out in retv]
if flatten_result:
retv = [numpy.concatenate(out) for out in retv]
if len(retv) == 1: if len(retv) == 1:
return retv[0] return retv[0]
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册