提交 2558c3f1 编写于 作者: H Haonan

revisions according to reviews

上级 b4c1d175
......@@ -267,4 +267,16 @@ extern void hl_matrix_collect_shared_bias(real* B_d,
const int dimN,
real scale);
/**
* @brief Matrix rotation in 90 degrees
*
* @param[in] mat input matrix (M x N).
* @param[out] matRot output matrix (N x M).
* @param[in] dimM input matrix height.
* @param[in] dimN input matrix width.
* @param[in] clockWise rotation direction
*/
extern void hl_matrix_rotate(
real* mat, real* matRot, int dimM, int dimN, bool clockWise);
#endif /* HL_MATRIX_H_ */
......@@ -106,4 +106,8 @@ inline void hl_matrix_collect_shared_bias(real* B_d,
const int dimM,
const int dimN,
real scale) {}
inline void hl_matrix_rotate(
real* mat, real* matRot, int dimM, int dimN, bool clockWise);
#endif // HL_MATRIX_STUB_H_
......@@ -840,3 +840,28 @@ void hl_matrix_collect_shared_bias(real* B_d,
(B_d, A_d, channel, dimM, dimN, dim, limit, scale);
CHECK_SYNC("hl_matrix_collect_shared_bias failed");
}
__global__ void keMatrixRotate(real* mat, real* matRot,
int dimM, int dimN, bool clockWise) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < dimM * dimN) {
int i = idx / dimN;
int j = idx % dimN;
if (clockWise) {
matRot[j * dimM + i] = mat[(dimM - i - 1) * dimN + j];
} else {
matRot[j * dimM + i] = mat[i * dimN + (dimN - j - 1)];
}
}
}
void hl_matrix_rotate(real *mat, real* matRot,
int dimM, int dimN, bool clockWise) {
CHECK_NOTNULL(mat);
CHECK_NOTNULL(matRot);
const int threads = 512;
const int blocks = DIVUP(dimM * dimN, threads);
keMatrixRotate<<< blocks, threads, 0, STREAM_DEFAULT >>>
(mat, matRot, dimM, dimN, clockWise);
CHECK_SYNC("hl_matrix_rotate failed");
}
......@@ -23,7 +23,8 @@ bool RotateLayer::init(const LayerMap& layerMap,
Layer::init(layerMap, parameterMap);
CHECK_EQ(inputLayers_.size(), 1UL);
sampleHeight_ = config_.height();
height_ = config_.height();
width_ = config_.width();
return true;
}
......@@ -32,26 +33,31 @@ void RotateLayer::forward(PassType passType) {
MatrixPtr input = getInputValue(0);
batchSize_ = input->getHeight();
sampleSize_ = input->getWidth();
sampleWidth_ = sampleSize_ / sampleHeight_;
CHECK_EQ(sampleSize_ % sampleHeight_, 0);
size_ = input->getWidth();
CHECK_GE(size_, height_ * width_);
CHECK_EQ(size_ % (height_ * width_), 0)
<< "The input's depth should be an int";
channels_ = size_ / (height_ * width_);
resizeOutput(batchSize_, sampleSize_);
resizeOutput(batchSize_, size_);
MatrixPtr outV = getOutputValue();
for (int b = 0; b < batchSize_; b++) {
MatrixPtr inputSample = Matrix::create(input->getData() + b * sampleSize_,
sampleHeight_,
sampleWidth_,
false,
useGpu_);
MatrixPtr outputSample = Matrix::create(outV->getData() + b * sampleSize_,
sampleWidth_,
sampleHeight_,
false,
useGpu_);
inputSample->rotate(outputSample, false, true);
for (int b = 0; b < batchSize_; b++) { // for each input feat map
for (int c = 0; c < channels_; c++) { // for each feat channel
MatrixPtr inputSample =
Matrix::create(input->getData() + b * size_ + c * height_ * width_,
height_,
width_,
false,
useGpu_);
MatrixPtr outputSample =
Matrix::create(outV->getData() + b * size_ + c * height_ * width_,
width_,
height_,
false,
useGpu_);
inputSample->rotate(outputSample, false, true /* clock-wise */);
}
}
if (getInputGrad(0)) {
......@@ -69,23 +75,24 @@ void RotateLayer::backward(const UpdateCallback& callback) {
// the grad should be rotated in the reverse direction
MatrixPtr preGrad = getInputGrad(0);
for (int b = 0; b < batchSize_; b++) {
MatrixPtr inputSampleGrad =
Matrix::create(preGrad->getData() + b * sampleSize_,
sampleHeight_,
sampleWidth_,
false,
useGpu_);
MatrixPtr outputSampleGrad =
Matrix::create(outputGrad->getData() + b * sampleSize_,
sampleWidth_,
sampleHeight_,
false,
useGpu_);
MatrixPtr tmpGrad =
Matrix::create(sampleHeight_, sampleWidth_, false, useGpu_);
outputSampleGrad->rotate(tmpGrad, false, false);
inputSampleGrad->add(*tmpGrad);
for (int b = 0; b < batchSize_; b++) { // for each input feat map
for (int c = 0; c < channels_; c++) { // for each feat channel
MatrixPtr inputSampleGrad =
Matrix::create(preGrad->getData() + b * size_ + c * height_ * width_,
height_,
width_,
false,
useGpu_);
MatrixPtr outputSampleGrad = Matrix::create(
outputGrad->getData() + b * size_ + c * height_ * width_,
width_,
height_,
false,
useGpu_);
MatrixPtr tmpGrad = nullptr;
outputSampleGrad->rotate(tmpGrad, true, false /* anti clock-wise */);
inputSampleGrad->add(*tmpGrad);
}
}
}
......
......@@ -19,12 +19,13 @@ limitations under the License. */
namespace paddle {
/**
* A layer for rotating an input sample (assume it's a matrix)
* The rotation is in clock-wise
* A layer for rotating a multi-channel feature map (M x N x C) in the spatial
* domain
* The rotation is 90 degrees in clock-wise
* \f[
* y(j,i) = x(M-i-1,j)
* y(j,i,:) = x(M-i-1,j,:)
* \f]
* where \f$x\f$ is (M x N) input, and \f$y\f$ is (N x M) output.
* where \f$x\f$ is (M x N x C) input, and \f$y\f$ is (N x M x C) output.
*
* The config file api is rotate_layer
*
......@@ -41,9 +42,10 @@ public:
private:
int batchSize_;
int sampleSize_;
int sampleHeight_;
int sampleWidth_;
int size_;
int height_;
int width_;
int channels_;
};
} // namespace paddle
......@@ -1320,9 +1320,12 @@ TEST(Layer, RotateLayer) {
TestConfig config;
config.biasSize = 0;
config.layerConfig.set_type("rotate");
const int INPUT_SIZE = 64; // height * width
const int INPUT_SIZE = 64; // height * width * depth
const int HEIGHT = 8;
const int WIDTH = 4;
config.layerConfig.set_size(INPUT_SIZE);
config.layerConfig.set_height(32);
config.layerConfig.set_height(HEIGHT);
config.layerConfig.set_width(WIDTH);
config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0});
config.layerConfig.add_inputs();
......
......@@ -388,6 +388,8 @@ void GpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
matTrans = std::make_shared<GpuMatrix>(width_, height_);
} else {
CHECK(matTrans != NULL);
CHECK_EQ(matTrans->getHeight(), width_);
CHECK_EQ(matTrans->getWidth(), height_);
}
real* dataTrans = matTrans->getData();
real* data = getData();
......@@ -402,15 +404,13 @@ void GpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
matRot = std::make_shared<GpuMatrix>(width_, height_);
} else {
CHECK(matRot != NULL);
CHECK_EQ(matRot->getHeight(), width_);
CHECK_EQ(matRot->getWidth(), height_);
}
MatrixPtr cpuMat = std::make_shared<CpuMatrix>(height_, width_);
cpuMat->copyFrom(*this);
MatrixPtr cpuMatRot = std::make_shared<CpuMatrix>(width_, height_);
cpuMat->rotate(cpuMatRot, false, clockWise);
matRot->copyFrom(*cpuMatRot);
real* dataRot = matRot->getData();
real* data = getData();
hl_matrix_rotate(data, dataRot, height_, width_, clockWise);
}
MatrixPtr GpuMatrix::getInverse() {
......@@ -1723,6 +1723,8 @@ void CpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
matTrans = std::make_shared<CpuMatrix>(width_, height_);
} else {
CHECK(matTrans != NULL);
CHECK_EQ(matTrans->getHeight(), width_);
CHECK_EQ(matTrans->getWidth(), height_);
}
real* dataTrans = matTrans->getData();
real* data = getData();
......@@ -1741,18 +1743,18 @@ void CpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
matRot = std::make_shared<CpuMatrix>(width_, height_);
} else {
CHECK(matRot != NULL);
CHECK_EQ(matRot->getHeight(), width_);
CHECK_EQ(matRot->getWidth(), height_);
}
real* dataRot = matRot->getData();
real* data = getData();
int lda = getStride();
int ldc = matRot->getStride();
for (size_t i = 0; i < height_; i++) {
for (size_t j = 0; j < width_; j++) {
if (clockWise) {
dataRot[j * ldc + i] = data[(height_ - i - 1) * lda + j];
dataRot[j * height_ + i] = data[(height_ - i - 1) * width_ + j];
} else {
dataRot[j * ldc + i] = data[i * lda + (width_ - j - 1)];
dataRot[j * height_ + i] = data[i * width_ + (width_ - j - 1)];
}
}
}
......
......@@ -377,9 +377,19 @@ public:
}
/**
* @brief rotate clock-wise.
* @brief rotate 90 degrees in clock-wise if clockWise=true;
* otherwise rotate in anti clock-wise
* clock-wise:
* \f[
* y(j,i) = x(M-i-1,j)
* \f]
* anti clock-wise:
* \f[
* y(j,i) = x(i, N-1-j)
* \f]
* where \f$x\f$ is (M x N) input, and \f$y\f$ is (N x M) output.
*
* allocate matTrans' memory outside, then set memAlloc as false;
* allocate matRot' memory outside, then set memAlloc as false;
* else set as true.
*/
virtual void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
......
......@@ -176,11 +176,29 @@ void testMatrixTranspose(int height, int width) {
cpu->randomizeUniform();
gpu->copyFrom(*cpu);
cpu->transpose(cpuT, false);
gpu->transpose(gpuT, false);
gpu->transpose(gpuT, true);
TensorCheckEqual(*cpuT, *gpuT);
}
void testMatrixRotate(int height, int width) {
MatrixPtr cpu = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpu = std::make_shared<GpuMatrix>(height, width);
MatrixPtr cpuR = std::make_shared<CpuMatrix>(width, height);
MatrixPtr gpuR = std::make_shared<GpuMatrix>(width, height);
cpu->randomizeUniform();
gpu->copyFrom(*cpu);
cpu->rotate(cpuR, false, true);
gpu->rotate(gpuR, true, true);
TensorCheckEqual(*cpuR, *gpuR);
cpu->rotate(cpuR, true, false);
gpu->rotate(gpuR, false, false);
TensorCheckEqual(*cpuR, *gpuR);
}
void testMatrixInverse(int height) {
MatrixPtr cpu = std::make_shared<CpuMatrix>(height, height);
MatrixPtr gpu = std::make_shared<GpuMatrix>(height, height);
......@@ -215,6 +233,7 @@ TEST(Matrix, unary) {
testMatrixZeroAtOffset(height, width);
testMatrixGetSum(height, width);
testMatrixTranspose(height, width);
testMatrixRotate(height, width);
}
// inverse
testMatrixInverse(height);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册