提交 b9dfe8e7 编写于 作者: H Haonan 提交者: GitHub

Merge pull request #1231 from yu239/rotate_and_flip

One bug fix and two new features
...@@ -267,4 +267,16 @@ extern void hl_matrix_collect_shared_bias(real* B_d, ...@@ -267,4 +267,16 @@ extern void hl_matrix_collect_shared_bias(real* B_d,
const int dimN, const int dimN,
real scale); real scale);
/**
* @brief Matrix rotation in 90 degrees
*
* @param[in] mat input matrix (M x N).
* @param[out] matRot output matrix (N x M).
* @param[in] dimM input matrix height.
* @param[in] dimN input matrix width.
* @param[in] clockWise rotation direction
*/
extern void hl_matrix_rotate(
real* mat, real* matRot, int dimM, int dimN, bool clockWise);
#endif /* HL_MATRIX_H_ */ #endif /* HL_MATRIX_H_ */
...@@ -106,4 +106,8 @@ inline void hl_matrix_collect_shared_bias(real* B_d, ...@@ -106,4 +106,8 @@ inline void hl_matrix_collect_shared_bias(real* B_d,
const int dimM, const int dimM,
const int dimN, const int dimN,
real scale) {} real scale) {}
inline void hl_matrix_rotate(
real* mat, real* matRot, int dimM, int dimN, bool clockWise) {}
#endif // HL_MATRIX_STUB_H_ #endif // HL_MATRIX_STUB_H_
...@@ -840,3 +840,28 @@ void hl_matrix_collect_shared_bias(real* B_d, ...@@ -840,3 +840,28 @@ void hl_matrix_collect_shared_bias(real* B_d,
(B_d, A_d, channel, dimM, dimN, dim, limit, scale); (B_d, A_d, channel, dimM, dimN, dim, limit, scale);
CHECK_SYNC("hl_matrix_collect_shared_bias failed"); CHECK_SYNC("hl_matrix_collect_shared_bias failed");
} }
__global__ void keMatrixRotate(real* mat, real* matRot,
int dimM, int dimN, bool clockWise) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < dimM * dimN) {
int i = idx / dimN;
int j = idx % dimN;
if (clockWise) {
matRot[j * dimM + i] = mat[(dimM - i - 1) * dimN + j];
} else {
matRot[j * dimM + i] = mat[i * dimN + (dimN - j - 1)];
}
}
}
void hl_matrix_rotate(real *mat, real* matRot,
int dimM, int dimN, bool clockWise) {
CHECK_NOTNULL(mat);
CHECK_NOTNULL(matRot);
const int threads = 512;
const int blocks = DIVUP(dimM * dimN, threads);
keMatrixRotate<<< blocks, threads, 0, STREAM_DEFAULT >>>
(mat, matRot, dimM, dimN, clockWise);
CHECK_SYNC("hl_matrix_rotate failed");
}
...@@ -95,6 +95,9 @@ void FeatureMapExpandLayer::forward(PassType passType) { ...@@ -95,6 +95,9 @@ void FeatureMapExpandLayer::forward(PassType passType) {
void FeatureMapExpandLayer::backward(const UpdateCallback& callback) { void FeatureMapExpandLayer::backward(const UpdateCallback& callback) {
MatrixPtr inGrad = getInputGrad(0); MatrixPtr inGrad = getInputGrad(0);
if (NULL == inGrad) {
return;
}
MatrixPtr outGrad = getOutputGrad(); MatrixPtr outGrad = getOutputGrad();
size_t batchSize = getInput(0).getBatchSize(); size_t batchSize = getInput(0).getBatchSize();
int imgSize = inGrad->getWidth(); int imgSize = inGrad->getWidth();
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "RotateLayer.h"
namespace paddle {
REGISTER_LAYER(rotate, RotateLayer);
bool RotateLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK_EQ(inputLayers_.size(), 1UL);
height_ = config_.height();
width_ = config_.width();
CHECK_GT(height_, 0);
CHECK_GT(width_, 0);
return true;
}
void RotateLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr input = getInputValue(0);
batchSize_ = input->getHeight();
size_ = input->getWidth();
CHECK_GE(size_, height_ * width_);
CHECK_EQ(size_ % (height_ * width_), 0)
<< "total size_ is not dividable by (height_ * width_), i.e., "
<< "channel number should be an integer";
channels_ = size_ / (height_ * width_);
resizeOutput(batchSize_, size_);
MatrixPtr outV = getOutputValue();
for (int b = 0; b < batchSize_; b++) { // for each input feat map
for (int c = 0; c < channels_; c++) { // for each feat channel
MatrixPtr inputSample =
Matrix::create(input->getData() + b * size_ + c * height_ * width_,
height_,
width_,
false,
useGpu_);
MatrixPtr outputSample =
Matrix::create(outV->getData() + b * size_ + c * height_ * width_,
width_,
height_,
false,
useGpu_);
inputSample->rotate(outputSample, false, true /* clock-wise */);
}
}
if (getInputGrad(0)) {
zeroGrad();
}
}
void RotateLayer::backward(const UpdateCallback& callback) {
(void)callback;
MatrixPtr outputGrad = getOutputGrad();
if (outputGrad == NULL) {
return;
}
// the grad should be rotated in the reverse direction
MatrixPtr preGrad = getInputGrad(0);
for (int b = 0; b < batchSize_; b++) { // for each input feat map
for (int c = 0; c < channels_; c++) { // for each feat channel
MatrixPtr inputSampleGrad =
Matrix::create(preGrad->getData() + b * size_ + c * height_ * width_,
height_,
width_,
false,
useGpu_);
MatrixPtr outputSampleGrad = Matrix::create(
outputGrad->getData() + b * size_ + c * height_ * width_,
width_,
height_,
false,
useGpu_);
MatrixPtr tmpGrad = nullptr;
outputSampleGrad->rotate(tmpGrad, true, false /* anti clock-wise */);
inputSampleGrad->add(*tmpGrad);
}
}
}
} // namespace paddle
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* A layer for rotating a multi-channel feature map (M x N x C) in the spatial
* domain
* The rotation is 90 degrees in clock-wise for each channel
* \f[
* y(j,i,:) = x(M-i-1,j,:)
* \f]
* where \f$x\f$ is (M x N x C) input, and \f$y\f$ is (N x M x C) output.
*
* The config file api is rotate_layer
*
*/
class RotateLayer : public Layer {
public:
explicit RotateLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr);
private:
int batchSize_;
int size_;
int height_;
int width_;
int channels_;
};
} // namespace paddle
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
/** /**
* A layer for transposition. * A layer for transposing a minibatch matrix.
* \f[ * \f[
y = x^\mathrm{T} y = x^\mathrm{T}
* \f] * \f]
......
...@@ -1316,6 +1316,25 @@ TEST(Layer, ResizeLayer) { ...@@ -1316,6 +1316,25 @@ TEST(Layer, ResizeLayer) {
} }
} }
TEST(Layer, RotateLayer) {
TestConfig config;
config.biasSize = 0;
config.layerConfig.set_type("rotate");
const int CHANNEL = 2;
const int HEIGHT = 8;
const int WIDTH = 4;
const int INPUT_SIZE = HEIGHT * WIDTH * CHANNEL;
config.layerConfig.set_size(INPUT_SIZE);
config.layerConfig.set_height(HEIGHT);
config.layerConfig.set_width(WIDTH);
config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "rotate", 100, false, useGpu);
}
}
TEST(Layer, NCELayer) { TEST(Layer, NCELayer) {
TestConfig config; TestConfig config;
size_t numClasses = 4; size_t numClasses = 4;
......
...@@ -372,7 +372,7 @@ MatrixPtr CpuSparseMatrix::subMatrix(size_t startRow, size_t numRows) { ...@@ -372,7 +372,7 @@ MatrixPtr CpuSparseMatrix::subMatrix(size_t startRow, size_t numRows) {
} }
/* mem MUST be alloced outside (memAlloc=false) */ /* mem MUST be alloced outside (memAlloc=false) */
void CpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { void CpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
CHECK(!memAlloc); CHECK(!memAlloc);
CpuSparseMatrix* mat = dynamic_cast<CpuSparseMatrix*>(matTrans.get()); CpuSparseMatrix* mat = dynamic_cast<CpuSparseMatrix*>(matTrans.get());
if (format_ == SPARSE_CSR) { if (format_ == SPARSE_CSR) {
......
...@@ -201,7 +201,7 @@ public: ...@@ -201,7 +201,7 @@ public:
void zeroMem(); void zeroMem();
/// mem MUST be alloced outside (memAlloc=false) /// mem MUST be alloced outside (memAlloc=false)
void transpose(MatrixPtr matTrans, bool memAlloc); void transpose(MatrixPtr& matTrans, bool memAlloc);
void mul(const Matrix& A, const Matrix& B, real alpha, real beta); void mul(const Matrix& A, const Matrix& B, real alpha, real beta);
......
...@@ -274,6 +274,18 @@ real GpuMatrix::getSum() { ...@@ -274,6 +274,18 @@ real GpuMatrix::getSum() {
return sum; return sum;
} }
real GpuMatrix::getMin() {
CHECK(isContiguous());
auto vec = GpuVector(height_ * width_, data_);
return vec.getMin();
}
real GpuMatrix::getMax() {
CHECK(isContiguous());
auto vec = GpuVector(height_ * width_, data_);
return vec.getMax();
}
void GpuMatrix::accumulateColSum(Matrix& src) { void GpuMatrix::accumulateColSum(Matrix& src) {
CHECK_EQ(getWidth(), src.getWidth()); CHECK_EQ(getWidth(), src.getWidth());
CHECK_EQ(getHeight(), (size_t)1); CHECK_EQ(getHeight(), (size_t)1);
...@@ -371,11 +383,13 @@ MatrixPtr GpuMatrix::getTranspose() { ...@@ -371,11 +383,13 @@ MatrixPtr GpuMatrix::getTranspose() {
} }
} }
void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { void GpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
if (memAlloc) { if (memAlloc) {
matTrans = std::make_shared<GpuMatrix>(width_, height_); matTrans = std::make_shared<GpuMatrix>(width_, height_);
} else { } else {
CHECK(matTrans != NULL); CHECK(matTrans != NULL);
CHECK_EQ(matTrans->getHeight(), width_);
CHECK_EQ(matTrans->getWidth(), height_);
} }
real* dataTrans = matTrans->getData(); real* dataTrans = matTrans->getData();
real* data = getData(); real* data = getData();
...@@ -385,13 +399,27 @@ void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { ...@@ -385,13 +399,27 @@ void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
hl_matrix_transpose(data, dataTrans, height_, width_, lda, ldc); hl_matrix_transpose(data, dataTrans, height_, width_, lda, ldc);
} }
void GpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
if (memAlloc) {
matRot = std::make_shared<GpuMatrix>(width_, height_);
} else {
CHECK(matRot != NULL);
CHECK_EQ(matRot->getHeight(), width_);
CHECK_EQ(matRot->getWidth(), height_);
}
real* dataRot = matRot->getData();
real* data = getData();
hl_matrix_rotate(data, dataRot, height_, width_, clockWise);
}
MatrixPtr GpuMatrix::getInverse() { MatrixPtr GpuMatrix::getInverse() {
MatrixPtr matInv; MatrixPtr matInv;
inverse(matInv, true); inverse(matInv, true);
return matInv; return matInv;
} }
void GpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) { void GpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) {
CHECK_EQ(height_, width_); CHECK_EQ(height_, width_);
if (memAlloc) { if (memAlloc) {
...@@ -1690,11 +1718,13 @@ MatrixPtr CpuMatrix::getTranspose() { ...@@ -1690,11 +1718,13 @@ MatrixPtr CpuMatrix::getTranspose() {
} }
} }
void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { void CpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
if (memAlloc) { if (memAlloc) {
matTrans = std::make_shared<CpuMatrix>(width_, height_); matTrans = std::make_shared<CpuMatrix>(width_, height_);
} else { } else {
CHECK(matTrans != NULL); CHECK(matTrans != NULL);
CHECK_EQ(matTrans->getHeight(), width_);
CHECK_EQ(matTrans->getWidth(), height_);
} }
real* dataTrans = matTrans->getData(); real* dataTrans = matTrans->getData();
real* data = getData(); real* data = getData();
...@@ -1708,13 +1738,35 @@ void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { ...@@ -1708,13 +1738,35 @@ void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
} }
} }
void CpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
if (memAlloc) {
matRot = std::make_shared<CpuMatrix>(width_, height_);
} else {
CHECK(matRot != NULL);
CHECK_EQ(matRot->getHeight(), width_);
CHECK_EQ(matRot->getWidth(), height_);
}
real* dataRot = matRot->getData();
real* data = getData();
for (size_t i = 0; i < height_; i++) {
for (size_t j = 0; j < width_; j++) {
if (clockWise) {
dataRot[j * height_ + i] = data[(height_ - i - 1) * width_ + j];
} else {
dataRot[j * height_ + i] = data[i * width_ + (width_ - j - 1)];
}
}
}
}
MatrixPtr CpuMatrix::getInverse() { MatrixPtr CpuMatrix::getInverse() {
MatrixPtr matInv; MatrixPtr matInv;
inverse(matInv, true); inverse(matInv, true);
return matInv; return matInv;
} }
void CpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) { void CpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) {
CHECK_EQ(height_, width_); CHECK_EQ(height_, width_);
if (memAlloc) { if (memAlloc) {
......
...@@ -372,7 +372,27 @@ public: ...@@ -372,7 +372,27 @@ public:
* allocate matTrans' memory outside, then set memAlloc as false; * allocate matTrans' memory outside, then set memAlloc as false;
* else set as true. * else set as true.
*/ */
virtual void transpose(MatrixPtr matTrans, bool memAlloc) { virtual void transpose(MatrixPtr& matTrans, bool memAlloc) {
LOG(FATAL) << "Not implemented";
}
/**
* @brief rotate 90 degrees in clock-wise if clockWise=true;
* otherwise rotate in anti clock-wise
* clock-wise:
* \f[
* y(j,i) = x(M-i-1,j)
* \f]
* anti clock-wise:
* \f[
* y(j,i) = x(i, N-1-j)
* \f]
* where \f$x\f$ is (M x N) input, and \f$y\f$ is (N x M) output.
*
* allocate matRot' memory outside, then set memAlloc as false;
* else set as true.
*/
virtual void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
...@@ -387,7 +407,7 @@ public: ...@@ -387,7 +407,7 @@ public:
* if allocate matInv's memory outside, then set memAlloc as false; * if allocate matInv's memory outside, then set memAlloc as false;
* else set as true. * else set as true.
*/ */
virtual void inverse(MatrixPtr matInv, bool memAlloc) { virtual void inverse(MatrixPtr& matInv, bool memAlloc) {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
...@@ -1169,11 +1189,15 @@ public: ...@@ -1169,11 +1189,15 @@ public:
void accumulateColSum(Matrix& src); void accumulateColSum(Matrix& src);
real getAbsSum(); real getAbsSum();
real getMin();
real getMax();
MatrixPtr getTranspose(); MatrixPtr getTranspose();
void transpose(MatrixPtr matTrans, bool memAlloc); void transpose(MatrixPtr& matTrans, bool memAlloc);
void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise);
MatrixPtr getInverse(); MatrixPtr getInverse();
void inverse(MatrixPtr matInv, bool memAlloc); void inverse(MatrixPtr& matInv, bool memAlloc);
/// add b to each sample of this. /// add b to each sample of this.
void addBias(Matrix& b, real scale); void addBias(Matrix& b, real scale);
...@@ -1485,10 +1509,11 @@ public: ...@@ -1485,10 +1509,11 @@ public:
real getAbsSum(); real getAbsSum();
MatrixPtr getTranspose(); MatrixPtr getTranspose();
void transpose(MatrixPtr matTrans, bool memAlloc); void transpose(MatrixPtr& matTrans, bool memAlloc);
void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise);
MatrixPtr getInverse(); MatrixPtr getInverse();
void inverse(MatrixPtr matInv, bool memAlloc); void inverse(MatrixPtr& matInv, bool memAlloc);
void copyFrom(const Matrix& src); void copyFrom(const Matrix& src);
......
...@@ -497,7 +497,7 @@ void GpuSparseMatrix::setRow(size_t row, ...@@ -497,7 +497,7 @@ void GpuSparseMatrix::setRow(size_t row,
SparseValueType GpuSparseMatrix::getValueType() const { return valueType_; } SparseValueType GpuSparseMatrix::getValueType() const { return valueType_; }
void GpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { void GpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
CHECK_EQ(format_, SPARSE_CSC); CHECK_EQ(format_, SPARSE_CSC);
int nnz = sMatrix_->nnz; int nnz = sMatrix_->nnz;
if (memAlloc) { if (memAlloc) {
......
...@@ -109,7 +109,7 @@ public: ...@@ -109,7 +109,7 @@ public:
MatrixPtr getTranspose(); MatrixPtr getTranspose();
/// B = A' /// B = A'
void transpose(MatrixPtr matTrans, bool memAlloc); void transpose(MatrixPtr& matTrans, bool memAlloc);
void copyFrom(const Matrix& src); void copyFrom(const Matrix& src);
void copyFrom(const Matrix& src, hl_stream_t stream); void copyFrom(const Matrix& src, hl_stream_t stream);
......
...@@ -248,11 +248,13 @@ TEST(Matrix, SparseMatrixTranspose) { ...@@ -248,11 +248,13 @@ TEST(Matrix, SparseMatrixTranspose) {
/*dense matrix transpose*/ /*dense matrix transpose*/
CpuMatrixPtr matC(new CpuMatrix(height, width)); CpuMatrixPtr matC(new CpuMatrix(height, width));
matC->copyFrom(*matA); matC->copyFrom(*matA);
CpuMatrixPtr matD(new CpuMatrix(width, height)); MatrixPtr matD(new CpuMatrix(width, height));
matC->transpose(matD, false); matC->transpose(matD, false);
/*check result*/ /*check result*/
checkSMatrixEqual2Dense( checkSMatrixEqual2Dense(
std::dynamic_pointer_cast<CpuSparseMatrix>(matB), matD); std::dynamic_pointer_cast<CpuSparseMatrix>(matB),
std::dynamic_pointer_cast<CpuMatrix>(matD));
} }
} }
} }
......
...@@ -105,6 +105,21 @@ void testMatrixGetSum(int height, int width) { ...@@ -105,6 +105,21 @@ void testMatrixGetSum(int height, int width) {
EXPECT_LE(fabs(cpuSum - gpuSum), err); EXPECT_LE(fabs(cpuSum - gpuSum), err);
} }
void testMatrixGetMinMax(int height, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
real cpuMin = cpuInput->getMin();
real gpuMin = gpuInput->getMin();
real cpuMax = cpuInput->getMax();
real gpuMax = gpuInput->getMax();
EXPECT_EQ(cpuMin, gpuMin);
EXPECT_EQ(cpuMax, gpuMax);
}
void testMatrixZeroAtOffset(int height, int width) { void testMatrixZeroAtOffset(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width); MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width); MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
...@@ -161,11 +176,29 @@ void testMatrixTranspose(int height, int width) { ...@@ -161,11 +176,29 @@ void testMatrixTranspose(int height, int width) {
cpu->randomizeUniform(); cpu->randomizeUniform();
gpu->copyFrom(*cpu); gpu->copyFrom(*cpu);
cpu->transpose(cpuT, false); cpu->transpose(cpuT, false);
gpu->transpose(gpuT, false); gpu->transpose(gpuT, true);
TensorCheckEqual(*cpuT, *gpuT); TensorCheckEqual(*cpuT, *gpuT);
} }
void testMatrixRotate(int height, int width) {
MatrixPtr cpu = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpu = std::make_shared<GpuMatrix>(height, width);
MatrixPtr cpuR = std::make_shared<CpuMatrix>(width, height);
MatrixPtr gpuR = std::make_shared<GpuMatrix>(width, height);
cpu->randomizeUniform();
gpu->copyFrom(*cpu);
cpu->rotate(cpuR, false, true);
gpu->rotate(gpuR, true, true);
TensorCheckEqual(*cpuR, *gpuR);
cpu->rotate(cpuR, true, false);
gpu->rotate(gpuR, false, false);
TensorCheckEqual(*cpuR, *gpuR);
}
void testMatrixInverse(int height) { void testMatrixInverse(int height) {
MatrixPtr cpu = std::make_shared<CpuMatrix>(height, height); MatrixPtr cpu = std::make_shared<CpuMatrix>(height, height);
MatrixPtr gpu = std::make_shared<GpuMatrix>(height, height); MatrixPtr gpu = std::make_shared<GpuMatrix>(height, height);
...@@ -181,7 +214,7 @@ void testMatrixInverse(int height) { ...@@ -181,7 +214,7 @@ void testMatrixInverse(int height) {
cpu->add(*outputCheck); cpu->add(*outputCheck);
gpu->copyFrom(*cpu); gpu->copyFrom(*cpu);
cpu->inverse(cpuI, false); cpu->inverse(cpuI, true);
gpu->inverse(gpuI, false); gpu->inverse(gpuI, false);
TensorCheckErr(*cpuI, *gpuI); TensorCheckErr(*cpuI, *gpuI);
...@@ -200,6 +233,7 @@ TEST(Matrix, unary) { ...@@ -200,6 +233,7 @@ TEST(Matrix, unary) {
testMatrixZeroAtOffset(height, width); testMatrixZeroAtOffset(height, width);
testMatrixGetSum(height, width); testMatrixGetSum(height, width);
testMatrixTranspose(height, width); testMatrixTranspose(height, width);
testMatrixRotate(height, width);
} }
// inverse // inverse
testMatrixInverse(height); testMatrixInverse(height);
......
...@@ -427,14 +427,14 @@ message LayerConfig { ...@@ -427,14 +427,14 @@ message LayerConfig {
// bias size // bias size
optional uint32 bias_size = 48 [default = 0]; optional uint32 bias_size = 48 [default = 0];
// this parameter can be used as a user-defined parameter when necessary, // this parameter can be used as a user-defined parameter when necessary,
// without changing the proto file. // without changing the proto file.
// e.g., when a new layer with a user-defined parameter is implemented, // e.g., when a new layer with a user-defined parameter is implemented,
// it can be used to pass that parameter, without modifying the proto file. // it can be used to pass that parameter, without modifying the proto file.
// string type is used for flexibility: different types can be converted // string type is used for flexibility: different types can be converted
// to string and reinterpreted in the user's own layer implementation. // to string and reinterpreted in the user's own layer implementation.
optional string user_arg = 49; optional string user_arg = 49;
// to indicate rectangle image data // to indicate rectangle image data
optional uint64 height = 50; optional uint64 height = 50;
optional uint64 width = 51; optional uint64 width = 51;
......
...@@ -830,7 +830,6 @@ class Pool(Cfg): ...@@ -830,7 +830,6 @@ class Pool(Cfg):
channels, channels,
size_x, size_x,
size_y=None, size_y=None,
img_width=None,
start=None, start=None,
stride=None, # 1 by defalut in protobuf stride=None, # 1 by defalut in protobuf
stride_y=None, stride_y=None,
...@@ -1927,8 +1926,8 @@ class BatchNormLayer(LayerBase): ...@@ -1927,8 +1926,8 @@ class BatchNormLayer(LayerBase):
image_conf = self.config.inputs[0].image_conf image_conf = self.config.inputs[0].image_conf
parse_image(self.inputs[0].image, input_layer.name, image_conf) parse_image(self.inputs[0].image, input_layer.name, image_conf)
# Only pass the width and height of input to batch_norm layer # Only pass the width and height of input to batch_norm layer
# when either of it is non-zero. # when either of it is non-zero.
if input_layer.width != 0 or input_layer.height != 0: if input_layer.width != 0 or input_layer.height != 0:
self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size, self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size,
image_conf.channels, False) image_conf.channels, False)
...@@ -1968,6 +1967,18 @@ class ResizeLayer(LayerBase): ...@@ -1968,6 +1967,18 @@ class ResizeLayer(LayerBase):
'ResizeLayer must have one and only one input') 'ResizeLayer must have one and only one input')
@config_layer('rotate')
class RotateLayer(LayerBase):
def __init__(self, name, inputs, height, width, device=None):
super(RotateLayer, self).__init__(
name, 'rotate', 0, inputs=inputs, device=device)
config_assert(
len(self.inputs) == 1,
'RotateLayer must have one and only one input')
self.set_layer_height_width(height, width)
self.set_layer_size(self.get_input_layer(0).size)
@config_layer('blockexpand') @config_layer('blockexpand')
class BlockExpandLayer(LayerBase): class BlockExpandLayer(LayerBase):
def __init__(self, name, inputs, **xargs): def __init__(self, name, inputs, **xargs):
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
# recurrent_units.py # recurrent_units.py
# Version 2.0 # Version 2.0
# #
# Some recurrent units can be used in recurrent layer group, # Some recurrent units can be used in recurrent layer group,
# to use these units, import this module in your config_file: # to use these units, import this module in your config_file:
# import trainer.recurrent_units # import trainer.recurrent_units
# #
# The modules in this file are DEPRECATED. # The modules in this file are DEPRECATED.
# If you would like to use lstm/gru # If you would like to use lstm/gru
# please use the functions defined in paddle.trainer_config_helpers. # please use the functions defined in paddle.trainer_config_helpers.
...@@ -29,7 +29,7 @@ from paddle.trainer.config_parser import * ...@@ -29,7 +29,7 @@ from paddle.trainer.config_parser import *
# long short term memory, can be used in recurrent machine # long short term memory, can be used in recurrent machine
# *inputs* must be a list of Projections, for example: # *inputs* must be a list of Projections, for example:
# inputs = [FullMatrixProjection("input_layer_name")], # inputs = [FullMatrixProjection("input_layer_name")],
# *para_prefix* defines parameter names, if the *para_prefix* of # *para_prefix* defines parameter names, if the *para_prefix* of
# two LstmRecurrentUnit is same, they share same parameters # two LstmRecurrentUnit is same, they share same parameters
# *out_memory* can be defined outside if it's used outside # *out_memory* can be defined outside if it's used outside
def LstmRecurrentUnit(name, def LstmRecurrentUnit(name,
...@@ -197,7 +197,7 @@ def LstmRecurrentLayerGroup(name, ...@@ -197,7 +197,7 @@ def LstmRecurrentLayerGroup(name,
# gated recurrent unit, can be used in recurrent machine # gated recurrent unit, can be used in recurrent machine
# *inputs* should be a list of Projections, for example: # *inputs* should be a list of Projections, for example:
# inputs = [FullMatrixProjection("input_layer_name")], # inputs = [FullMatrixProjection("input_layer_name")],
# *para_prefix* defines parameter names, if the *para_prefix* of # *para_prefix* defines parameter names, if the *para_prefix* of
# two GatedRecurrentUnit is same, they share same parameters # two GatedRecurrentUnit is same, they share same parameters
# *out_memory* can be defined outside if it's used outside # *out_memory* can be defined outside if it's used outside
......
...@@ -70,6 +70,7 @@ __all__ = [ ...@@ -70,6 +70,7 @@ __all__ = [
'interpolation_layer', 'interpolation_layer',
'bilinear_interp_layer', 'bilinear_interp_layer',
'trans_layer', 'trans_layer',
'rotate_layer',
'sum_to_one_norm_layer', 'sum_to_one_norm_layer',
'get_output_layer', 'get_output_layer',
'LayerType', 'LayerType',
...@@ -154,6 +155,7 @@ class LayerType(object): ...@@ -154,6 +155,7 @@ class LayerType(object):
POWER_LAYER = 'power' POWER_LAYER = 'power'
SCALING_LAYER = 'scaling' SCALING_LAYER = 'scaling'
TRANS_LAYER = 'trans' TRANS_LAYER = 'trans'
ROTATE_LAYER = 'rotate'
OUT_PROD_LAYER = 'out_prod' OUT_PROD_LAYER = 'out_prod'
FEATURE_MAP_EXPAND_LAYER = 'featmap_expand' FEATURE_MAP_EXPAND_LAYER = 'featmap_expand'
...@@ -1642,7 +1644,7 @@ def scaling_layer(input, weight, name=None, layer_attr=None): ...@@ -1642,7 +1644,7 @@ def scaling_layer(input, weight, name=None, layer_attr=None):
@layer_support() @layer_support()
def trans_layer(input, name=None, layer_attr=None): def trans_layer(input, name=None, layer_attr=None):
""" """
A layer for transposition. A layer for transposing a minibatch matrix.
.. math:: .. math::
y = x^\mathrm{T} y = x^\mathrm{T}
...@@ -1673,6 +1675,52 @@ def trans_layer(input, name=None, layer_attr=None): ...@@ -1673,6 +1675,52 @@ def trans_layer(input, name=None, layer_attr=None):
name, LayerType.TRANS_LAYER, parents=[input], size=input.size) name, LayerType.TRANS_LAYER, parents=[input], size=input.size)
@wrap_name_default()
@layer_support()
def rotate_layer(input, height, width, name=None, layer_attr=None):
"""
A layer for rotating 90 degrees (clock-wise) for each feature channel,
usually used when the input sample is some image or feature map.
.. math::
y(j,i,:) = x(M-i-1,j,:)
where :math:`x` is (M x N x C) input, and :math:`y` is (N x M x C) output.
The example usage is:
.. code-block:: python
rot = rotate_layer(input=layer,
height=100,
width=100)
:param input: Input layer.
:type input: LayerOutput
:param height: The height of the sample matrix
:type height: int
:param name: Layer name.
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput)
l = Layer(
name=name,
height=height,
width=width,
type=LayerType.ROTATE_LAYER,
inputs=[input.name],
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name=name,
layer_type=LayerType.ROTATE_LAYER,
parents=[input],
size=l.config.size)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
...@@ -1826,14 +1874,14 @@ def img_conv_layer(input, ...@@ -1826,14 +1874,14 @@ def img_conv_layer(input,
trans=False, trans=False,
layer_type=None): layer_type=None):
""" """
Convolution layer for image. Paddle can support both square and non-square Convolution layer for image. Paddle can support both square and non-square
input currently. input currently.
The details of convolution layer, please refer UFLDL's `convolution The details of convolution layer, please refer UFLDL's `convolution
<http://ufldl.stanford.edu/tutorial/supervised/ <http://ufldl.stanford.edu/tutorial/supervised/
FeatureExtractionUsingConvolution/>`_ . FeatureExtractionUsingConvolution/>`_ .
Convolution Transpose (deconv) layer for image. Paddle can support both square Convolution Transpose (deconv) layer for image. Paddle can support both square
and non-square input currently. and non-square input currently.
The details of convolution transpose layer, The details of convolution transpose layer,
...@@ -1892,7 +1940,7 @@ def img_conv_layer(input, ...@@ -1892,7 +1940,7 @@ def img_conv_layer(input,
:param trans: true if it is a convTransLayer, false if it is a convLayer :param trans: true if it is a convTransLayer, false if it is a convLayer
:type trans: bool :type trans: bool
:param layer_type: specify the layer_type, default is None. If trans=True, :param layer_type: specify the layer_type, default is None. If trans=True,
layer_type has to be "exconvt", otherwise layer_type layer_type has to be "exconvt", otherwise layer_type
has to be either "exconv" or "cudnn_conv" has to be either "exconv" or "cudnn_conv"
:type layer_type: String :type layer_type: String
:return: LayerOutput object. :return: LayerOutput object.
...@@ -3626,9 +3674,9 @@ def pad_layer(input, ...@@ -3626,9 +3674,9 @@ def pad_layer(input,
input data and 3 zeros after the input data in channel dimension. input data and 3 zeros after the input data in channel dimension.
pad_h means padding zeros in height dimension. pad_w means padding zeros pad_h means padding zeros in height dimension. pad_w means padding zeros
in width dimension. in width dimension.
For example, For example,
.. code-block:: .. code-block::
input(2,2,2,3) = [ input(2,2,2,3) = [
...@@ -3637,7 +3685,7 @@ def pad_layer(input, ...@@ -3637,7 +3685,7 @@ def pad_layer(input,
[ [[4,3,1], [1,8,7]], [ [[4,3,1], [1,8,7]],
[[3,8,9], [2,3,5]] ] [[3,8,9], [2,3,5]] ]
] ]
pad_c=[1,1], pad_h=[0,0], pad_w=[0,0] pad_c=[1,1], pad_h=[0,0], pad_w=[0,0]
output(2,4,2,3) = [ output(2,4,2,3) = [
[ [[0,0,0], [0,0,0]], [ [[0,0,0], [0,0,0]],
...@@ -4746,6 +4794,7 @@ def cross_entropy_with_selfnorm(input, ...@@ -4746,6 +4794,7 @@ def cross_entropy_with_selfnorm(input,
layer_attr=None): layer_attr=None):
""" """
A loss layer for multi class entropy with selfnorm. A loss layer for multi class entropy with selfnorm.
Input should be a vector of positive numbers, without normalization.
.. code-block:: python .. code-block:: python
......
...@@ -957,22 +957,22 @@ def simple_gru(input, ...@@ -957,22 +957,22 @@ def simple_gru(input,
use one complete layer to implement rnn (including simple rnn, gru and lstm) use one complete layer to implement rnn (including simple rnn, gru and lstm)
with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But, with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But,
the multiplication operation :math:`W x_t` is not computed in these layers. the multiplication operation :math:`W x_t` is not computed in these layers.
See details in their interfaces in layers.py. See details in their interfaces in layers.py.
The other implementation is to use an recurrent group which can ensemble a The other implementation is to use an recurrent group which can ensemble a
series of layers to compute rnn step by step. This way is flexible for series of layers to compute rnn step by step. This way is flexible for
attenion mechanism or other complex connections. attenion mechanism or other complex connections.
- gru_step_layer: only compute rnn by one step. It needs an memory as input - gru_step_layer: only compute rnn by one step. It needs an memory as input
and can be used in recurrent group. and can be used in recurrent group.
- gru_unit: a wrapper of gru_step_layer with memory. - gru_unit: a wrapper of gru_step_layer with memory.
- gru_group: a GRU cell implemented by a combination of multiple layers in - gru_group: a GRU cell implemented by a combination of multiple layers in
recurrent group. recurrent group.
But :math:`W x_t` is not done in group. But :math:`W x_t` is not done in group.
- gru_memory: a GRU cell implemented by one layer, which does same calculation - gru_memory: a GRU cell implemented by one layer, which does same calculation
with gru_group and is faster than gru_group. with gru_group and is faster than gru_group.
- simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and - simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and
gru_group. :math:`W` contains :math:`W_r`, :math:`W_z` and :math:`W`, see gru_group. :math:`W` contains :math:`W_r`, :math:`W_z` and :math:`W`, see
formula in grumemory. formula in grumemory.
The computational speed is that, grumemory is relatively better than The computational speed is that, grumemory is relatively better than
gru_group, and gru_group is relatively better than simple_gru. gru_group, and gru_group is relatively better than simple_gru.
......
...@@ -39,6 +39,7 @@ z1 = mixed_layer( ...@@ -39,6 +39,7 @@ z1 = mixed_layer(
assert z1.size > 0 assert z1.size > 0
y2 = fc_layer(input=y, size=15) y2 = fc_layer(input=y, size=15)
z2 = rotate_layer(input=y2, height=5, width=3)
cos1 = cos_sim(a=x1, b=y1) cos1 = cos_sim(a=x1, b=y1)
cos3 = cos_sim(a=x1, b=y2, size=3) cos3 = cos_sim(a=x1, b=y2, size=3)
...@@ -46,7 +47,7 @@ cos3 = cos_sim(a=x1, b=y2, size=3) ...@@ -46,7 +47,7 @@ cos3 = cos_sim(a=x1, b=y2, size=3)
linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3) linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3)
out = fc_layer( out = fc_layer(
input=[cos1, cos3, linear_comb, z, z1], input=[cos1, cos3, linear_comb, z, z1, z2],
size=num_classes, size=num_classes,
act=SoftmaxActivation()) act=SoftmaxActivation())
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册