diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h index abd5eb3a0cf338c689680dd0f7192be7b2530383..40828dd5cc76f4197e6cfbb1121f2eef2c1ac580 100644 --- a/paddle/cuda/include/hl_matrix.h +++ b/paddle/cuda/include/hl_matrix.h @@ -267,4 +267,16 @@ extern void hl_matrix_collect_shared_bias(real* B_d, const int dimN, real scale); +/** + * @brief Matrix rotation in 90 degrees + * + * @param[in] mat input matrix (M x N). + * @param[out] matRot output matrix (N x M). + * @param[in] dimM input matrix height. + * @param[in] dimN input matrix width. + * @param[in] clockWise rotation direction + */ +extern void hl_matrix_rotate( + real* mat, real* matRot, int dimM, int dimN, bool clockWise); + #endif /* HL_MATRIX_H_ */ diff --git a/paddle/cuda/include/stub/hl_matrix_stub.h b/paddle/cuda/include/stub/hl_matrix_stub.h index 0b669f6735cb9771fd63ed8e3b45602db0db447c..a1712d1e4d2a5dc80526b7d7b5ad7bd4f5d8c1ed 100644 --- a/paddle/cuda/include/stub/hl_matrix_stub.h +++ b/paddle/cuda/include/stub/hl_matrix_stub.h @@ -106,4 +106,8 @@ inline void hl_matrix_collect_shared_bias(real* B_d, const int dimM, const int dimN, real scale) {} + +inline void hl_matrix_rotate( + real* mat, real* matRot, int dimM, int dimN, bool clockWise) {} + #endif // HL_MATRIX_STUB_H_ diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu index 2b4c6f7c39cff78c0e76cc1dfd41e1c7ef334f11..cd23bd31057c5c8cd10173bc5fa5fa67f2d0e422 100644 --- a/paddle/cuda/src/hl_cuda_matrix.cu +++ b/paddle/cuda/src/hl_cuda_matrix.cu @@ -840,3 +840,28 @@ void hl_matrix_collect_shared_bias(real* B_d, (B_d, A_d, channel, dimM, dimN, dim, limit, scale); CHECK_SYNC("hl_matrix_collect_shared_bias failed"); } + +__global__ void keMatrixRotate(real* mat, real* matRot, + int dimM, int dimN, bool clockWise) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < dimM * dimN) { + int i = idx / dimN; + int j = idx % dimN; + if (clockWise) { + matRot[j * dimM + i] = mat[(dimM - i - 1) * dimN + j]; + } else { + matRot[j * dimM + i] = mat[i * dimN + (dimN - j - 1)]; + } + } +} + +void hl_matrix_rotate(real *mat, real* matRot, + int dimM, int dimN, bool clockWise) { + CHECK_NOTNULL(mat); + CHECK_NOTNULL(matRot); + const int threads = 512; + const int blocks = DIVUP(dimM * dimN, threads); + keMatrixRotate<<< blocks, threads, 0, STREAM_DEFAULT >>> + (mat, matRot, dimM, dimN, clockWise); + CHECK_SYNC("hl_matrix_rotate failed"); +} diff --git a/paddle/gserver/layers/FeatureMapExpandLayer.cpp b/paddle/gserver/layers/FeatureMapExpandLayer.cpp index d023074c52167554358d0d4df7ec40cfba9da2a6..4b685812aac24782e4fe47fa85c7e91bd3494087 100644 --- a/paddle/gserver/layers/FeatureMapExpandLayer.cpp +++ b/paddle/gserver/layers/FeatureMapExpandLayer.cpp @@ -95,6 +95,9 @@ void FeatureMapExpandLayer::forward(PassType passType) { void FeatureMapExpandLayer::backward(const UpdateCallback& callback) { MatrixPtr inGrad = getInputGrad(0); + if (NULL == inGrad) { + return; + } MatrixPtr outGrad = getOutputGrad(); size_t batchSize = getInput(0).getBatchSize(); int imgSize = inGrad->getWidth(); diff --git a/paddle/gserver/layers/RotateLayer.cpp b/paddle/gserver/layers/RotateLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7c71088d786ab218bf0f71b577985c023dd1436f --- /dev/null +++ b/paddle/gserver/layers/RotateLayer.cpp @@ -0,0 +1,102 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "RotateLayer.h" + +namespace paddle { + +REGISTER_LAYER(rotate, RotateLayer); + +bool RotateLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + CHECK_EQ(inputLayers_.size(), 1UL); + height_ = config_.height(); + width_ = config_.width(); + CHECK_GT(height_, 0); + CHECK_GT(width_, 0); + return true; +} + +void RotateLayer::forward(PassType passType) { + Layer::forward(passType); + + MatrixPtr input = getInputValue(0); + batchSize_ = input->getHeight(); + size_ = input->getWidth(); + CHECK_GE(size_, height_ * width_); + CHECK_EQ(size_ % (height_ * width_), 0) + << "total size_ is not dividable by (height_ * width_), i.e., " + << "channel number should be an integer"; + channels_ = size_ / (height_ * width_); + + resizeOutput(batchSize_, size_); + + MatrixPtr outV = getOutputValue(); + for (int b = 0; b < batchSize_; b++) { // for each input feat map + for (int c = 0; c < channels_; c++) { // for each feat channel + MatrixPtr inputSample = + Matrix::create(input->getData() + b * size_ + c * height_ * width_, + height_, + width_, + false, + useGpu_); + MatrixPtr outputSample = + Matrix::create(outV->getData() + b * size_ + c * height_ * width_, + width_, + height_, + false, + useGpu_); + inputSample->rotate(outputSample, false, true /* clock-wise */); + } + } + + if (getInputGrad(0)) { + zeroGrad(); + } +} + +void RotateLayer::backward(const UpdateCallback& callback) { + (void)callback; + + MatrixPtr outputGrad = getOutputGrad(); + if (outputGrad == NULL) { + return; + } + // the grad should be rotated in the reverse direction + MatrixPtr preGrad = getInputGrad(0); + + for (int b = 0; b < batchSize_; b++) { // for each input feat map + for (int c = 0; c < channels_; c++) { // for each feat channel + MatrixPtr inputSampleGrad = + Matrix::create(preGrad->getData() + b * size_ + c * height_ * width_, + height_, + width_, + false, + useGpu_); + MatrixPtr outputSampleGrad = Matrix::create( + outputGrad->getData() + b * size_ + c * height_ * width_, + width_, + height_, + false, + useGpu_); + MatrixPtr tmpGrad = nullptr; + outputSampleGrad->rotate(tmpGrad, true, false /* anti clock-wise */); + inputSampleGrad->add(*tmpGrad); + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/RotateLayer.h b/paddle/gserver/layers/RotateLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..1a64d4d5a51d9c04df07861f02f1bb91eaec088e --- /dev/null +++ b/paddle/gserver/layers/RotateLayer.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" +#include "paddle/math/Matrix.h" + +namespace paddle { +/** + * A layer for rotating a multi-channel feature map (M x N x C) in the spatial + * domain + * The rotation is 90 degrees in clock-wise for each channel + * \f[ + * y(j,i,:) = x(M-i-1,j,:) + * \f] + * where \f$x\f$ is (M x N x C) input, and \f$y\f$ is (N x M x C) output. + * + * The config file api is rotate_layer + * +*/ + +class RotateLayer : public Layer { +public: + explicit RotateLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + void backward(const UpdateCallback& callback = nullptr); + +private: + int batchSize_; + int size_; + int height_; + int width_; + int channels_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/TransLayer.h b/paddle/gserver/layers/TransLayer.h index b43fa1ebfb003226daed724b4ede3006545e8b07..1d5a370b3414bf96076532858ff822080f40a2f5 100644 --- a/paddle/gserver/layers/TransLayer.h +++ b/paddle/gserver/layers/TransLayer.h @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { /** - * A layer for transposition. + * A layer for transposing a minibatch matrix. * \f[ y = x^\mathrm{T} * \f] diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 8c8e876bd64fb97e11bc04c26ec45358f3f808a1..14d9db52470b2828186eca04d303135910489266 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1316,6 +1316,25 @@ TEST(Layer, ResizeLayer) { } } +TEST(Layer, RotateLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("rotate"); + const int CHANNEL = 2; + const int HEIGHT = 8; + const int WIDTH = 4; + const int INPUT_SIZE = HEIGHT * WIDTH * CHANNEL; + config.layerConfig.set_size(INPUT_SIZE); + config.layerConfig.set_height(HEIGHT); + config.layerConfig.set_width(WIDTH); + config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "rotate", 100, false, useGpu); + } +} + TEST(Layer, NCELayer) { TestConfig config; size_t numClasses = 4; diff --git a/paddle/math/CpuSparseMatrix.cpp b/paddle/math/CpuSparseMatrix.cpp index 82a482f701481267e564c7ad8179689deb65a75b..bf62229c03bb1d6e2bdf86d8c56a8157938fb832 100644 --- a/paddle/math/CpuSparseMatrix.cpp +++ b/paddle/math/CpuSparseMatrix.cpp @@ -372,7 +372,7 @@ MatrixPtr CpuSparseMatrix::subMatrix(size_t startRow, size_t numRows) { } /* mem MUST be alloced outside (memAlloc=false) */ -void CpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { +void CpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { CHECK(!memAlloc); CpuSparseMatrix* mat = dynamic_cast(matTrans.get()); if (format_ == SPARSE_CSR) { diff --git a/paddle/math/CpuSparseMatrix.h b/paddle/math/CpuSparseMatrix.h index d3e8871cb5b320ce420d601bde7f18d85398dde7..860cad1047fc343b13efa901186ea218d0855151 100644 --- a/paddle/math/CpuSparseMatrix.h +++ b/paddle/math/CpuSparseMatrix.h @@ -201,7 +201,7 @@ public: void zeroMem(); /// mem MUST be alloced outside (memAlloc=false) - void transpose(MatrixPtr matTrans, bool memAlloc); + void transpose(MatrixPtr& matTrans, bool memAlloc); void mul(const Matrix& A, const Matrix& B, real alpha, real beta); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 3ae237bc7de895293c15eedc811cf8a2011a7c52..a8b53e2105b053399e62fba5321fd22c1fe4a50d 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -274,6 +274,18 @@ real GpuMatrix::getSum() { return sum; } +real GpuMatrix::getMin() { + CHECK(isContiguous()); + auto vec = GpuVector(height_ * width_, data_); + return vec.getMin(); +} + +real GpuMatrix::getMax() { + CHECK(isContiguous()); + auto vec = GpuVector(height_ * width_, data_); + return vec.getMax(); +} + void GpuMatrix::accumulateColSum(Matrix& src) { CHECK_EQ(getWidth(), src.getWidth()); CHECK_EQ(getHeight(), (size_t)1); @@ -371,11 +383,13 @@ MatrixPtr GpuMatrix::getTranspose() { } } -void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { +void GpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { if (memAlloc) { matTrans = std::make_shared(width_, height_); } else { CHECK(matTrans != NULL); + CHECK_EQ(matTrans->getHeight(), width_); + CHECK_EQ(matTrans->getWidth(), height_); } real* dataTrans = matTrans->getData(); real* data = getData(); @@ -385,13 +399,27 @@ void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { hl_matrix_transpose(data, dataTrans, height_, width_, lda, ldc); } +void GpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { + if (memAlloc) { + matRot = std::make_shared(width_, height_); + } else { + CHECK(matRot != NULL); + CHECK_EQ(matRot->getHeight(), width_); + CHECK_EQ(matRot->getWidth(), height_); + } + + real* dataRot = matRot->getData(); + real* data = getData(); + hl_matrix_rotate(data, dataRot, height_, width_, clockWise); +} + MatrixPtr GpuMatrix::getInverse() { MatrixPtr matInv; inverse(matInv, true); return matInv; } -void GpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) { +void GpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) { CHECK_EQ(height_, width_); if (memAlloc) { @@ -1690,11 +1718,13 @@ MatrixPtr CpuMatrix::getTranspose() { } } -void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { +void CpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { if (memAlloc) { matTrans = std::make_shared(width_, height_); } else { CHECK(matTrans != NULL); + CHECK_EQ(matTrans->getHeight(), width_); + CHECK_EQ(matTrans->getWidth(), height_); } real* dataTrans = matTrans->getData(); real* data = getData(); @@ -1708,13 +1738,35 @@ void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { } } +void CpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { + if (memAlloc) { + matRot = std::make_shared(width_, height_); + } else { + CHECK(matRot != NULL); + CHECK_EQ(matRot->getHeight(), width_); + CHECK_EQ(matRot->getWidth(), height_); + } + real* dataRot = matRot->getData(); + real* data = getData(); + + for (size_t i = 0; i < height_; i++) { + for (size_t j = 0; j < width_; j++) { + if (clockWise) { + dataRot[j * height_ + i] = data[(height_ - i - 1) * width_ + j]; + } else { + dataRot[j * height_ + i] = data[i * width_ + (width_ - j - 1)]; + } + } + } +} + MatrixPtr CpuMatrix::getInverse() { MatrixPtr matInv; inverse(matInv, true); return matInv; } -void CpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) { +void CpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) { CHECK_EQ(height_, width_); if (memAlloc) { diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 57c0c2fe40a95d75ca580196e6f9ae36ce6edcdc..c92c0a272d5a72868bd61035d77aa4ed0fad7a7c 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -372,7 +372,27 @@ public: * allocate matTrans' memory outside, then set memAlloc as false; * else set as true. */ - virtual void transpose(MatrixPtr matTrans, bool memAlloc) { + virtual void transpose(MatrixPtr& matTrans, bool memAlloc) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @brief rotate 90 degrees in clock-wise if clockWise=true; + * otherwise rotate in anti clock-wise + * clock-wise: + * \f[ + * y(j,i) = x(M-i-1,j) + * \f] + * anti clock-wise: + * \f[ + * y(j,i) = x(i, N-1-j) + * \f] + * where \f$x\f$ is (M x N) input, and \f$y\f$ is (N x M) output. + * + * allocate matRot' memory outside, then set memAlloc as false; + * else set as true. + */ + virtual void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { LOG(FATAL) << "Not implemented"; } @@ -387,7 +407,7 @@ public: * if allocate matInv's memory outside, then set memAlloc as false; * else set as true. */ - virtual void inverse(MatrixPtr matInv, bool memAlloc) { + virtual void inverse(MatrixPtr& matInv, bool memAlloc) { LOG(FATAL) << "Not implemented"; } @@ -1169,11 +1189,15 @@ public: void accumulateColSum(Matrix& src); real getAbsSum(); + real getMin(); + real getMax(); + MatrixPtr getTranspose(); - void transpose(MatrixPtr matTrans, bool memAlloc); + void transpose(MatrixPtr& matTrans, bool memAlloc); + void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise); MatrixPtr getInverse(); - void inverse(MatrixPtr matInv, bool memAlloc); + void inverse(MatrixPtr& matInv, bool memAlloc); /// add b to each sample of this. void addBias(Matrix& b, real scale); @@ -1485,10 +1509,11 @@ public: real getAbsSum(); MatrixPtr getTranspose(); - void transpose(MatrixPtr matTrans, bool memAlloc); + void transpose(MatrixPtr& matTrans, bool memAlloc); + void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise); MatrixPtr getInverse(); - void inverse(MatrixPtr matInv, bool memAlloc); + void inverse(MatrixPtr& matInv, bool memAlloc); void copyFrom(const Matrix& src); diff --git a/paddle/math/SparseMatrix.cpp b/paddle/math/SparseMatrix.cpp index 3bae6d373f240fcc773644386b290ef9874828ae..6370c77386688a334fa0de8b4e2b272882e9e2b0 100644 --- a/paddle/math/SparseMatrix.cpp +++ b/paddle/math/SparseMatrix.cpp @@ -497,7 +497,7 @@ void GpuSparseMatrix::setRow(size_t row, SparseValueType GpuSparseMatrix::getValueType() const { return valueType_; } -void GpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { +void GpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { CHECK_EQ(format_, SPARSE_CSC); int nnz = sMatrix_->nnz; if (memAlloc) { diff --git a/paddle/math/SparseMatrix.h b/paddle/math/SparseMatrix.h index 1d3801548e03a6ae679afb15bf7f620172d61c57..f6cd5df338965b55ca17636de097d2401dc057f9 100644 --- a/paddle/math/SparseMatrix.h +++ b/paddle/math/SparseMatrix.h @@ -109,7 +109,7 @@ public: MatrixPtr getTranspose(); /// B = A' - void transpose(MatrixPtr matTrans, bool memAlloc); + void transpose(MatrixPtr& matTrans, bool memAlloc); void copyFrom(const Matrix& src); void copyFrom(const Matrix& src, hl_stream_t stream); diff --git a/paddle/math/tests/test_SparseMatrix.cpp b/paddle/math/tests/test_SparseMatrix.cpp index 9d3fbaef43d719d07577631d5df3ac4656610cc6..c0572dfdbf738a4dfad04811b3a3e1b65487ff6d 100644 --- a/paddle/math/tests/test_SparseMatrix.cpp +++ b/paddle/math/tests/test_SparseMatrix.cpp @@ -248,11 +248,13 @@ TEST(Matrix, SparseMatrixTranspose) { /*dense matrix transpose*/ CpuMatrixPtr matC(new CpuMatrix(height, width)); matC->copyFrom(*matA); - CpuMatrixPtr matD(new CpuMatrix(width, height)); + MatrixPtr matD(new CpuMatrix(width, height)); matC->transpose(matD, false); + /*check result*/ checkSMatrixEqual2Dense( - std::dynamic_pointer_cast(matB), matD); + std::dynamic_pointer_cast(matB), + std::dynamic_pointer_cast(matD)); } } } diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index f0c49791d7e2a67220eafca3e1347f30958877a7..e024f2cf1b913f56301ac7b3380f0c382818f413 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -105,6 +105,21 @@ void testMatrixGetSum(int height, int width) { EXPECT_LE(fabs(cpuSum - gpuSum), err); } +void testMatrixGetMinMax(int height, int width) { + MatrixPtr cpuInput = std::make_shared(height, width); + MatrixPtr gpuInput = std::make_shared(height, width); + cpuInput->randomizeUniform(); + gpuInput->copyFrom(*cpuInput); + + real cpuMin = cpuInput->getMin(); + real gpuMin = gpuInput->getMin(); + real cpuMax = cpuInput->getMax(); + real gpuMax = gpuInput->getMax(); + + EXPECT_EQ(cpuMin, gpuMin); + EXPECT_EQ(cpuMax, gpuMax); +} + void testMatrixZeroAtOffset(int height, int width) { MatrixPtr cpuA = std::make_shared(height, width); MatrixPtr gpuA = std::make_shared(height, width); @@ -161,11 +176,29 @@ void testMatrixTranspose(int height, int width) { cpu->randomizeUniform(); gpu->copyFrom(*cpu); cpu->transpose(cpuT, false); - gpu->transpose(gpuT, false); + gpu->transpose(gpuT, true); TensorCheckEqual(*cpuT, *gpuT); } +void testMatrixRotate(int height, int width) { + MatrixPtr cpu = std::make_shared(height, width); + MatrixPtr gpu = std::make_shared(height, width); + MatrixPtr cpuR = std::make_shared(width, height); + MatrixPtr gpuR = std::make_shared(width, height); + + cpu->randomizeUniform(); + gpu->copyFrom(*cpu); + + cpu->rotate(cpuR, false, true); + gpu->rotate(gpuR, true, true); + TensorCheckEqual(*cpuR, *gpuR); + + cpu->rotate(cpuR, true, false); + gpu->rotate(gpuR, false, false); + TensorCheckEqual(*cpuR, *gpuR); +} + void testMatrixInverse(int height) { MatrixPtr cpu = std::make_shared(height, height); MatrixPtr gpu = std::make_shared(height, height); @@ -181,7 +214,7 @@ void testMatrixInverse(int height) { cpu->add(*outputCheck); gpu->copyFrom(*cpu); - cpu->inverse(cpuI, false); + cpu->inverse(cpuI, true); gpu->inverse(gpuI, false); TensorCheckErr(*cpuI, *gpuI); @@ -200,6 +233,7 @@ TEST(Matrix, unary) { testMatrixZeroAtOffset(height, width); testMatrixGetSum(height, width); testMatrixTranspose(height, width); + testMatrixRotate(height, width); } // inverse testMatrixInverse(height); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 0456404832c301d8ceb9338d32da0cea9eae5234..be4634d5103c0f219389823d132b1977963017e1 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -427,14 +427,14 @@ message LayerConfig { // bias size optional uint32 bias_size = 48 [default = 0]; - // this parameter can be used as a user-defined parameter when necessary, + // this parameter can be used as a user-defined parameter when necessary, // without changing the proto file. - // e.g., when a new layer with a user-defined parameter is implemented, + // e.g., when a new layer with a user-defined parameter is implemented, // it can be used to pass that parameter, without modifying the proto file. // string type is used for flexibility: different types can be converted - // to string and reinterpreted in the user's own layer implementation. + // to string and reinterpreted in the user's own layer implementation. optional string user_arg = 49; - + // to indicate rectangle image data optional uint64 height = 50; optional uint64 width = 51; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 6701eced60d068312a1a866a6312002f9f5207f7..b02af991dc577e070dac813cfd18d35ab6dfc3e0 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -830,7 +830,6 @@ class Pool(Cfg): channels, size_x, size_y=None, - img_width=None, start=None, stride=None, # 1 by defalut in protobuf stride_y=None, @@ -1927,8 +1926,8 @@ class BatchNormLayer(LayerBase): image_conf = self.config.inputs[0].image_conf parse_image(self.inputs[0].image, input_layer.name, image_conf) - # Only pass the width and height of input to batch_norm layer - # when either of it is non-zero. + # Only pass the width and height of input to batch_norm layer + # when either of it is non-zero. if input_layer.width != 0 or input_layer.height != 0: self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size, image_conf.channels, False) @@ -1968,6 +1967,18 @@ class ResizeLayer(LayerBase): 'ResizeLayer must have one and only one input') +@config_layer('rotate') +class RotateLayer(LayerBase): + def __init__(self, name, inputs, height, width, device=None): + super(RotateLayer, self).__init__( + name, 'rotate', 0, inputs=inputs, device=device) + config_assert( + len(self.inputs) == 1, + 'RotateLayer must have one and only one input') + self.set_layer_height_width(height, width) + self.set_layer_size(self.get_input_layer(0).size) + + @config_layer('blockexpand') class BlockExpandLayer(LayerBase): def __init__(self, name, inputs, **xargs): diff --git a/python/paddle/trainer/recurrent_units.py b/python/paddle/trainer/recurrent_units.py old mode 100644 new mode 100755 index ff7e8932dcad798815e8a4d8bc586fba335d5e7a..ef92107a1093d2ec2b2a41677e964fdaa60ac829 --- a/python/paddle/trainer/recurrent_units.py +++ b/python/paddle/trainer/recurrent_units.py @@ -15,10 +15,10 @@ # recurrent_units.py # Version 2.0 # -# Some recurrent units can be used in recurrent layer group, +# Some recurrent units can be used in recurrent layer group, # to use these units, import this module in your config_file: -# import trainer.recurrent_units -# +# import trainer.recurrent_units +# # The modules in this file are DEPRECATED. # If you would like to use lstm/gru # please use the functions defined in paddle.trainer_config_helpers. @@ -29,7 +29,7 @@ from paddle.trainer.config_parser import * # long short term memory, can be used in recurrent machine # *inputs* must be a list of Projections, for example: # inputs = [FullMatrixProjection("input_layer_name")], -# *para_prefix* defines parameter names, if the *para_prefix* of +# *para_prefix* defines parameter names, if the *para_prefix* of # two LstmRecurrentUnit is same, they share same parameters # *out_memory* can be defined outside if it's used outside def LstmRecurrentUnit(name, @@ -197,7 +197,7 @@ def LstmRecurrentLayerGroup(name, # gated recurrent unit, can be used in recurrent machine # *inputs* should be a list of Projections, for example: # inputs = [FullMatrixProjection("input_layer_name")], -# *para_prefix* defines parameter names, if the *para_prefix* of +# *para_prefix* defines parameter names, if the *para_prefix* of # two GatedRecurrentUnit is same, they share same parameters # *out_memory* can be defined outside if it's used outside diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py old mode 100644 new mode 100755 index 14035ce3fd8aabe6a6800fa1be95193f053c1bba..66fa58ac91e33bfeac37d1bfbdad8dab4789c4bd --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -70,6 +70,7 @@ __all__ = [ 'interpolation_layer', 'bilinear_interp_layer', 'trans_layer', + 'rotate_layer', 'sum_to_one_norm_layer', 'get_output_layer', 'LayerType', @@ -154,6 +155,7 @@ class LayerType(object): POWER_LAYER = 'power' SCALING_LAYER = 'scaling' TRANS_LAYER = 'trans' + ROTATE_LAYER = 'rotate' OUT_PROD_LAYER = 'out_prod' FEATURE_MAP_EXPAND_LAYER = 'featmap_expand' @@ -1642,7 +1644,7 @@ def scaling_layer(input, weight, name=None, layer_attr=None): @layer_support() def trans_layer(input, name=None, layer_attr=None): """ - A layer for transposition. + A layer for transposing a minibatch matrix. .. math:: y = x^\mathrm{T} @@ -1673,6 +1675,52 @@ def trans_layer(input, name=None, layer_attr=None): name, LayerType.TRANS_LAYER, parents=[input], size=input.size) +@wrap_name_default() +@layer_support() +def rotate_layer(input, height, width, name=None, layer_attr=None): + """ + A layer for rotating 90 degrees (clock-wise) for each feature channel, + usually used when the input sample is some image or feature map. + + .. math:: + y(j,i,:) = x(M-i-1,j,:) + + where :math:`x` is (M x N x C) input, and :math:`y` is (N x M x C) output. + + The example usage is: + + .. code-block:: python + + rot = rotate_layer(input=layer, + height=100, + width=100) + + :param input: Input layer. + :type input: LayerOutput + :param height: The height of the sample matrix + :type height: int + :param name: Layer name. + :type name: basestring + :param layer_attr: extra layer attributes. + :type layer_attr: ExtraLayerAttribute. + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(input, LayerOutput) + l = Layer( + name=name, + height=height, + width=width, + type=LayerType.ROTATE_LAYER, + inputs=[input.name], + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + layer_type=LayerType.ROTATE_LAYER, + parents=[input], + size=l.config.size) + + @wrap_name_default() @layer_support() def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): @@ -1826,14 +1874,14 @@ def img_conv_layer(input, trans=False, layer_type=None): """ - Convolution layer for image. Paddle can support both square and non-square + Convolution layer for image. Paddle can support both square and non-square input currently. The details of convolution layer, please refer UFLDL's `convolution `_ . - Convolution Transpose (deconv) layer for image. Paddle can support both square + Convolution Transpose (deconv) layer for image. Paddle can support both square and non-square input currently. The details of convolution transpose layer, @@ -1892,7 +1940,7 @@ def img_conv_layer(input, :param trans: true if it is a convTransLayer, false if it is a convLayer :type trans: bool :param layer_type: specify the layer_type, default is None. If trans=True, - layer_type has to be "exconvt", otherwise layer_type + layer_type has to be "exconvt", otherwise layer_type has to be either "exconv" or "cudnn_conv" :type layer_type: String :return: LayerOutput object. @@ -3626,9 +3674,9 @@ def pad_layer(input, input data and 3 zeros after the input data in channel dimension. pad_h means padding zeros in height dimension. pad_w means padding zeros in width dimension. - + For example, - + .. code-block:: input(2,2,2,3) = [ @@ -3637,7 +3685,7 @@ def pad_layer(input, [ [[4,3,1], [1,8,7]], [[3,8,9], [2,3,5]] ] ] - + pad_c=[1,1], pad_h=[0,0], pad_w=[0,0] output(2,4,2,3) = [ [ [[0,0,0], [0,0,0]], @@ -4746,6 +4794,7 @@ def cross_entropy_with_selfnorm(input, layer_attr=None): """ A loss layer for multi class entropy with selfnorm. + Input should be a vector of positive numbers, without normalization. .. code-block:: python diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py old mode 100644 new mode 100755 index d0cbe3891f60f4b153b5bf659901b63dd178ae19..cadde11ff81658cb309cd1bf7a44bac6374c1e44 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -957,22 +957,22 @@ def simple_gru(input, use one complete layer to implement rnn (including simple rnn, gru and lstm) with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But, the multiplication operation :math:`W x_t` is not computed in these layers. - See details in their interfaces in layers.py. + See details in their interfaces in layers.py. The other implementation is to use an recurrent group which can ensemble a series of layers to compute rnn step by step. This way is flexible for attenion mechanism or other complex connections. - gru_step_layer: only compute rnn by one step. It needs an memory as input and can be used in recurrent group. - - gru_unit: a wrapper of gru_step_layer with memory. + - gru_unit: a wrapper of gru_step_layer with memory. - gru_group: a GRU cell implemented by a combination of multiple layers in recurrent group. - But :math:`W x_t` is not done in group. + But :math:`W x_t` is not done in group. - gru_memory: a GRU cell implemented by one layer, which does same calculation - with gru_group and is faster than gru_group. - - simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and + with gru_group and is faster than gru_group. + - simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and gru_group. :math:`W` contains :math:`W_r`, :math:`W_z` and :math:`W`, see - formula in grumemory. + formula in grumemory. The computational speed is that, grumemory is relatively better than gru_group, and gru_group is relatively better than simple_gru. diff --git a/python/paddle/trainer_config_helpers/tests/layers_test_config.py b/python/paddle/trainer_config_helpers/tests/layers_test_config.py index ae275735aa2b852b3b226a4a0e5b2d4d000ba199..e6cd35ee761d1acd0b5c1943554c7ea1de6a13f5 100644 --- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py +++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py @@ -39,6 +39,7 @@ z1 = mixed_layer( assert z1.size > 0 y2 = fc_layer(input=y, size=15) +z2 = rotate_layer(input=y2, height=5, width=3) cos1 = cos_sim(a=x1, b=y1) cos3 = cos_sim(a=x1, b=y2, size=3) @@ -46,7 +47,7 @@ cos3 = cos_sim(a=x1, b=y2, size=3) linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3) out = fc_layer( - input=[cos1, cos3, linear_comb, z, z1], + input=[cos1, cos3, linear_comb, z, z1, z2], size=num_classes, act=SoftmaxActivation())