提交 781b85b5 编写于 作者: H Haonan

rotate_layer and flip_layer * added getMin and getMax for GpuMatrix * gru_step_layer parameter name

上级 c1f9cd9d
......@@ -20,7 +20,7 @@ limitations under the License. */
namespace paddle {
/**
* A layer for transposition.
* A layer for transposing a minibatch matrix.
* \f[
y = x^\mathrm{T}
* \f]
......
......@@ -1316,6 +1316,21 @@ TEST(Layer, ResizeLayer) {
}
}
TEST(Layer, RotateLayer) {
TestConfig config;
config.biasSize = 0;
config.layerConfig.set_type("rotate");
const int INPUT_SIZE = 64; // height * width
config.layerConfig.set_size(INPUT_SIZE);
config.layerConfig.set_height(32);
config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "rotate", 100, false, useGpu);
}
}
TEST(Layer, NCELayer) {
TestConfig config;
size_t numClasses = 4;
......
......@@ -372,7 +372,7 @@ MatrixPtr CpuSparseMatrix::subMatrix(size_t startRow, size_t numRows) {
}
/* mem MUST be alloced outside (memAlloc=false) */
void CpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
void CpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
CHECK(!memAlloc);
CpuSparseMatrix* mat = dynamic_cast<CpuSparseMatrix*>(matTrans.get());
if (format_ == SPARSE_CSR) {
......
......@@ -201,7 +201,7 @@ public:
void zeroMem();
/// mem MUST be alloced outside (memAlloc=false)
void transpose(MatrixPtr matTrans, bool memAlloc);
void transpose(MatrixPtr& matTrans, bool memAlloc);
void mul(const Matrix& A, const Matrix& B, real alpha, real beta);
......
......@@ -274,6 +274,18 @@ real GpuMatrix::getSum() {
return sum;
}
real GpuMatrix::getMin() {
CHECK(isContiguous());
auto vec = GpuVector(height_ * width_, data_);
return vec.getMin();
}
real GpuMatrix::getMax() {
CHECK(isContiguous());
auto vec = GpuVector(height_ * width_, data_);
return vec.getMax();
}
void GpuMatrix::accumulateColSum(Matrix& src) {
CHECK_EQ(getWidth(), src.getWidth());
CHECK_EQ(getHeight(), (size_t)1);
......@@ -371,7 +383,7 @@ MatrixPtr GpuMatrix::getTranspose() {
}
}
void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
void GpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
if (memAlloc) {
matTrans = std::make_shared<GpuMatrix>(width_, height_);
} else {
......@@ -385,13 +397,29 @@ void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
hl_matrix_transpose(data, dataTrans, height_, width_, lda, ldc);
}
void GpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
if (memAlloc) {
matRot = std::make_shared<GpuMatrix>(width_, height_);
} else {
CHECK(matRot != NULL);
}
MatrixPtr cpuMat = std::make_shared<CpuMatrix>(height_, width_);
cpuMat->copyFrom(*this);
MatrixPtr cpuMatRot = std::make_shared<CpuMatrix>(width_, height_);
cpuMat->rotate(cpuMatRot, false, clockWise);
matRot->copyFrom(*cpuMatRot);
}
MatrixPtr GpuMatrix::getInverse() {
MatrixPtr matInv;
inverse(matInv, true);
return matInv;
}
void GpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) {
void GpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) {
CHECK_EQ(height_, width_);
if (memAlloc) {
......@@ -1690,7 +1718,7 @@ MatrixPtr CpuMatrix::getTranspose() {
}
}
void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
void CpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
if (memAlloc) {
matTrans = std::make_shared<CpuMatrix>(width_, height_);
} else {
......@@ -1708,13 +1736,35 @@ void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
}
}
void CpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
if (memAlloc) {
matRot = std::make_shared<CpuMatrix>(width_, height_);
} else {
CHECK(matRot != NULL);
}
real* dataRot = matRot->getData();
real* data = getData();
int lda = getStride();
int ldc = matRot->getStride();
for (size_t i = 0; i < height_; i++) {
for (size_t j = 0; j < width_; j++) {
if (clockWise) {
dataRot[j * ldc + i] = data[(height_ - i - 1) * lda + j];
} else {
dataRot[j * ldc + i] = data[i * lda + (width_ - j - 1)];
}
}
}
}
MatrixPtr CpuMatrix::getInverse() {
MatrixPtr matInv;
inverse(matInv, true);
return matInv;
}
void CpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) {
void CpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) {
CHECK_EQ(height_, width_);
if (memAlloc) {
......
......@@ -372,7 +372,17 @@ public:
* allocate matTrans' memory outside, then set memAlloc as false;
* else set as true.
*/
virtual void transpose(MatrixPtr matTrans, bool memAlloc) {
virtual void transpose(MatrixPtr& matTrans, bool memAlloc) {
LOG(FATAL) << "Not implemented";
}
/**
* @brief rotate clock-wise.
*
* allocate matTrans' memory outside, then set memAlloc as false;
* else set as true.
*/
virtual void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) {
LOG(FATAL) << "Not implemented";
}
......@@ -387,7 +397,7 @@ public:
* if allocate matInv's memory outside, then set memAlloc as false;
* else set as true.
*/
virtual void inverse(MatrixPtr matInv, bool memAlloc) {
virtual void inverse(MatrixPtr& matInv, bool memAlloc) {
LOG(FATAL) << "Not implemented";
}
......@@ -1169,11 +1179,15 @@ public:
void accumulateColSum(Matrix& src);
real getAbsSum();
real getMin();
real getMax();
MatrixPtr getTranspose();
void transpose(MatrixPtr matTrans, bool memAlloc);
void transpose(MatrixPtr& matTrans, bool memAlloc);
void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise);
MatrixPtr getInverse();
void inverse(MatrixPtr matInv, bool memAlloc);
void inverse(MatrixPtr& matInv, bool memAlloc);
/// add b to each sample of this.
void addBias(Matrix& b, real scale);
......@@ -1485,10 +1499,11 @@ public:
real getAbsSum();
MatrixPtr getTranspose();
void transpose(MatrixPtr matTrans, bool memAlloc);
void transpose(MatrixPtr& matTrans, bool memAlloc);
void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise);
MatrixPtr getInverse();
void inverse(MatrixPtr matInv, bool memAlloc);
void inverse(MatrixPtr& matInv, bool memAlloc);
void copyFrom(const Matrix& src);
......
......@@ -497,7 +497,7 @@ void GpuSparseMatrix::setRow(size_t row,
SparseValueType GpuSparseMatrix::getValueType() const { return valueType_; }
void GpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
void GpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) {
CHECK_EQ(format_, SPARSE_CSC);
int nnz = sMatrix_->nnz;
if (memAlloc) {
......
......@@ -109,7 +109,7 @@ public:
MatrixPtr getTranspose();
/// B = A'
void transpose(MatrixPtr matTrans, bool memAlloc);
void transpose(MatrixPtr& matTrans, bool memAlloc);
void copyFrom(const Matrix& src);
void copyFrom(const Matrix& src, hl_stream_t stream);
......
......@@ -248,11 +248,13 @@ TEST(Matrix, SparseMatrixTranspose) {
/*dense matrix transpose*/
CpuMatrixPtr matC(new CpuMatrix(height, width));
matC->copyFrom(*matA);
CpuMatrixPtr matD(new CpuMatrix(width, height));
MatrixPtr matD(new CpuMatrix(width, height));
matC->transpose(matD, false);
/*check result*/
checkSMatrixEqual2Dense(
std::dynamic_pointer_cast<CpuSparseMatrix>(matB), matD);
std::dynamic_pointer_cast<CpuSparseMatrix>(matB),
std::dynamic_pointer_cast<CpuMatrix>(matD));
}
}
}
......
......@@ -105,6 +105,21 @@ void testMatrixGetSum(int height, int width) {
EXPECT_LE(fabs(cpuSum - gpuSum), err);
}
void testMatrixGetMinMax(int height, int width) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(height, width);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
real cpuMin = cpuInput->getMin();
real gpuMin = gpuInput->getMin();
real cpuMax = cpuInput->getMax();
real gpuMax = gpuInput->getMax();
EXPECT_EQ(cpuMin, gpuMin);
EXPECT_EQ(cpuMax, gpuMax);
}
void testMatrixZeroAtOffset(int height, int width) {
MatrixPtr cpuA = std::make_shared<CpuMatrix>(height, width);
MatrixPtr gpuA = std::make_shared<GpuMatrix>(height, width);
......@@ -181,7 +196,7 @@ void testMatrixInverse(int height) {
cpu->add(*outputCheck);
gpu->copyFrom(*cpu);
cpu->inverse(cpuI, false);
cpu->inverse(cpuI, true);
gpu->inverse(gpuI, false);
TensorCheckErr(*cpuI, *gpuI);
......
......@@ -427,14 +427,14 @@ message LayerConfig {
// bias size
optional uint32 bias_size = 48 [default = 0];
// this parameter can be used as a user-defined parameter when necessary,
// this parameter can be used as a user-defined parameter when necessary,
// without changing the proto file.
// e.g., when a new layer with a user-defined parameter is implemented,
// e.g., when a new layer with a user-defined parameter is implemented,
// it can be used to pass that parameter, without modifying the proto file.
// string type is used for flexibility: different types can be converted
// to string and reinterpreted in the user's own layer implementation.
// to string and reinterpreted in the user's own layer implementation.
optional string user_arg = 49;
// to indicate rectangle image data
optional uint64 height = 50;
optional uint64 width = 51;
......
......@@ -830,7 +830,6 @@ class Pool(Cfg):
channels,
size_x,
size_y=None,
img_width=None,
start=None,
stride=None, # 1 by defalut in protobuf
stride_y=None,
......@@ -1834,6 +1833,7 @@ class PoolLayer(LayerBase):
pool_conf.channels)
@config_layer('spp')
class SpatialPyramidPoolLayer(LayerBase):
def __init__(self, name, inputs, **xargs):
......@@ -1927,8 +1927,8 @@ class BatchNormLayer(LayerBase):
image_conf = self.config.inputs[0].image_conf
parse_image(self.inputs[0].image, input_layer.name, image_conf)
# Only pass the width and height of input to batch_norm layer
# when either of it is non-zero.
# Only pass the width and height of input to batch_norm layer
# when either of it is non-zero.
if input_layer.width != 0 or input_layer.height != 0:
self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size,
image_conf.channels, False)
......@@ -1968,6 +1968,18 @@ class ResizeLayer(LayerBase):
'ResizeLayer must have one and only one input')
@config_layer('rotate')
class RotateLayer(LayerBase):
def __init__(self, name, inputs, height, device=None):
super(RotateLayer, self).__init__(
name, 'rotate', 0, inputs=inputs, device=device)
config_assert(
len(self.inputs) == 1,
'RotateLayer must have one and only one input')
self.config.height = height
self.set_layer_size(self.get_input_layer(0).size)
@config_layer('blockexpand')
class BlockExpandLayer(LayerBase):
def __init__(self, name, inputs, **xargs):
......
......@@ -15,10 +15,10 @@
# recurrent_units.py
# Version 2.0
#
# Some recurrent units can be used in recurrent layer group,
# Some recurrent units can be used in recurrent layer group,
# to use these units, import this module in your config_file:
# import trainer.recurrent_units
#
# import trainer.recurrent_units
#
# The modules in this file are DEPRECATED.
# If you would like to use lstm/gru
# please use the functions defined in paddle.trainer_config_helpers.
......@@ -29,7 +29,7 @@ from paddle.trainer.config_parser import *
# long short term memory, can be used in recurrent machine
# *inputs* must be a list of Projections, for example:
# inputs = [FullMatrixProjection("input_layer_name")],
# *para_prefix* defines parameter names, if the *para_prefix* of
# *para_prefix* defines parameter names, if the *para_prefix* of
# two LstmRecurrentUnit is same, they share same parameters
# *out_memory* can be defined outside if it's used outside
def LstmRecurrentUnit(name,
......@@ -197,7 +197,7 @@ def LstmRecurrentLayerGroup(name,
# gated recurrent unit, can be used in recurrent machine
# *inputs* should be a list of Projections, for example:
# inputs = [FullMatrixProjection("input_layer_name")],
# *para_prefix* defines parameter names, if the *para_prefix* of
# *para_prefix* defines parameter names, if the *para_prefix* of
# two GatedRecurrentUnit is same, they share same parameters
# *out_memory* can be defined outside if it's used outside
......
......@@ -70,6 +70,8 @@ __all__ = [
'interpolation_layer',
'bilinear_interp_layer',
'trans_layer',
'rotate_layer',
'flip_layer',
'sum_to_one_norm_layer',
'get_output_layer',
'LayerType',
......@@ -154,6 +156,7 @@ class LayerType(object):
POWER_LAYER = 'power'
SCALING_LAYER = 'scaling'
TRANS_LAYER = 'trans'
ROTATE_LAYER = 'rotate'
OUT_PROD_LAYER = 'out_prod'
FEATURE_MAP_EXPAND_LAYER = 'featmap_expand'
......@@ -1642,7 +1645,7 @@ def scaling_layer(input, weight, name=None, layer_attr=None):
@layer_support()
def trans_layer(input, name=None, layer_attr=None):
"""
A layer for transposition.
A layer for transposing a minibatch matrix.
.. math::
y = x^\mathrm{T}
......@@ -1673,6 +1676,87 @@ def trans_layer(input, name=None, layer_attr=None):
name, LayerType.TRANS_LAYER, parents=[input], size=input.size)
@wrap_name_default()
@layer_support()
def rotate_layer(input, height, name=None, layer_attr=None):
"""
A layer for rotation (clock-wise), usually used when the input sample
is some image or map.
.. math::
y(j,i) = x(M-i-1,j)
where :math:`x` is (M x N) input, and :math:`y` is (N x M) output.
The example usage is:
.. code-block:: python
rot = rotate_layer(input=layer,
height=100)
:param input: Input layer.
:type input: LayerOutput
:param height: The height of the sample matrix
:type height: int
:param name: Layer name.
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput)
l = Layer(name=name,
height=height,
type=LayerType.ROTATE_LAYER,
inputs=[input.name],
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name=name,
layer_type=LayerType.ROTATE_LAYER,
parents=[input],
size=l.config.size)
@wrap_name_default()
@layer_support()
def flip_layer(input, height, name=None, layer_attr=None):
"""
A layer for flipping the matrix w.r.t the matrix center.
It's essentially rotating the matrix twice.
Used for input as image or map.
.. math::
y(i,j) = x(M-i-1, N-j-1)
where :math:`x` is (M x N) input, and :math:`y` is (M x N) output.
The example usage is:
.. code-block:: python
flip = flip_layer(input=layer,
height=100)
:param input: Input layer.
:type input: LayerOutput
:param height: The height of the sample matrix
:type height: int
:param name: Layer name.
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput)
return rotate_layer(input=rotate_layer(input=input,
height=height),
height=height,
name=name,
layer_attr=layer_attr)
@wrap_name_default()
@layer_support()
def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
......@@ -1826,14 +1910,14 @@ def img_conv_layer(input,
trans=False,
layer_type=None):
"""
Convolution layer for image. Paddle can support both square and non-square
Convolution layer for image. Paddle can support both square and non-square
input currently.
The details of convolution layer, please refer UFLDL's `convolution
<http://ufldl.stanford.edu/tutorial/supervised/
FeatureExtractionUsingConvolution/>`_ .
Convolution Transpose (deconv) layer for image. Paddle can support both square
Convolution Transpose (deconv) layer for image. Paddle can support both square
and non-square input currently.
The details of convolution transpose layer,
......@@ -1892,7 +1976,7 @@ def img_conv_layer(input,
:param trans: true if it is a convTransLayer, false if it is a convLayer
:type trans: bool
:param layer_type: specify the layer_type, default is None. If trans=True,
layer_type has to be "exconvt", otherwise layer_type
layer_type has to be "exconvt", otherwise layer_type
has to be either "exconv" or "cudnn_conv"
:type layer_type: String
:return: LayerOutput object.
......@@ -3619,9 +3703,9 @@ def pad_layer(input,
input data and 3 zeros after the input data in channel dimension.
pad_h means padding zeros in height dimension. pad_w means padding zeros
in width dimension.
For example,
.. code-block::
input(2,2,2,3) = [
......@@ -3630,7 +3714,7 @@ def pad_layer(input,
[ [[4,3,1], [1,8,7]],
[[3,8,9], [2,3,5]] ]
]
pad_c=[1,1], pad_h=[0,0], pad_w=[0,0]
output(2,4,2,3) = [
[ [[0,0,0], [0,0,0]],
......@@ -4739,6 +4823,7 @@ def cross_entropy_with_selfnorm(input,
layer_attr=None):
"""
A loss layer for multi class entropy with selfnorm.
Input should be a vector of positive numbers, without normalization.
.. code-block:: python
......
......@@ -957,22 +957,22 @@ def simple_gru(input,
use one complete layer to implement rnn (including simple rnn, gru and lstm)
with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But,
the multiplication operation :math:`W x_t` is not computed in these layers.
See details in their interfaces in layers.py.
See details in their interfaces in layers.py.
The other implementation is to use an recurrent group which can ensemble a
series of layers to compute rnn step by step. This way is flexible for
attenion mechanism or other complex connections.
- gru_step_layer: only compute rnn by one step. It needs an memory as input
and can be used in recurrent group.
- gru_unit: a wrapper of gru_step_layer with memory.
- gru_unit: a wrapper of gru_step_layer with memory.
- gru_group: a GRU cell implemented by a combination of multiple layers in
recurrent group.
But :math:`W x_t` is not done in group.
But :math:`W x_t` is not done in group.
- gru_memory: a GRU cell implemented by one layer, which does same calculation
with gru_group and is faster than gru_group.
- simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and
with gru_group and is faster than gru_group.
- simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and
gru_group. :math:`W` contains :math:`W_r`, :math:`W_z` and :math:`W`, see
formula in grumemory.
formula in grumemory.
The computational speed is that, grumemory is relatively better than
gru_group, and gru_group is relatively better than simple_gru.
......
......@@ -39,6 +39,10 @@ z1 = mixed_layer(
assert z1.size > 0
y2 = fc_layer(input=y, size=15)
z2 = rotate_layer(input=y2,
height=5)
z3 = flip_layer(input=y2,
height=3)
cos1 = cos_sim(a=x1, b=y1)
cos3 = cos_sim(a=x1, b=y2, size=3)
......@@ -46,7 +50,7 @@ cos3 = cos_sim(a=x1, b=y2, size=3)
linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3)
out = fc_layer(
input=[cos1, cos3, linear_comb, z, z1],
input=[cos1, cos3, linear_comb, z, z1, z2, z3],
size=num_classes,
act=SoftmaxActivation())
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册