diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 0b999c557c6159c103fa1873598193fde7eb82f3..f53541adc1be1dcaafbfc62f875c9f58e63d15c6 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -187,6 +187,15 @@ MatrixPtr Matrix::subMatrix(size_t startRow, size_t endRow, size_t startCol, trans_, useGpu_); } +void Matrix::setDiag(real value) { + CHECK(data_ != NULL); + CHECK_EQ(height_, width_); + + zeroMem(); + BaseMatrix diag(height_, 1, stride_ + 1, data_, false, useGpu_); + diag.assign(value); +} + GpuMatrix::GpuMatrix(size_t height, size_t width, bool trans) : Matrix(std::make_shared(height * width * sizeof(real)), height, width, trans, true) {} @@ -202,6 +211,7 @@ void GpuMatrix::resetOne() { CHECK(data_ != NULL); one(); } + void GpuMatrix::resize(size_t newHeight, size_t newWidth) { size_t newSize = newHeight * newWidth; if (NULL == memoryHandle_.get() || diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 293d13f4d6d5af0883ea76fb64ca5d9173efd4e0..120957f45d0c93656a3f9e87ed59410513632e25 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -195,6 +195,8 @@ public: virtual void resetOne() { LOG(FATAL) << "Not implemented"; } + void setDiag(real value); + virtual void copyFrom(const Matrix& src) { LOG(FATAL) << "Not implemented"; } virtual void trimFrom(const CpuSparseMatrix& src) { @@ -330,6 +332,7 @@ public: virtual MatrixPtr getInverse() { LOG(FATAL) << "Not implemented"; + return nullptr; } /** @@ -1016,6 +1019,7 @@ public: void zeroMem(); void resetOne(); + void setDiag(real value); void resize(size_t newHeight, size_t newWidth); void resize(size_t newHeight, size_t newWidth, @@ -1280,6 +1284,8 @@ public: void zeroMem(); void resetOne(); + void setDiag(real value); + void resize(size_t newHeight, size_t newWidth); void resize(size_t newHeight, size_t newWidth, size_t newNnz, /* used to allocate space */ diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index b887cccaaa14e6c3761d151f31a859de66cf8fac..91a68006325f65516bc67a29c2ad4f0762b8f96d 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -647,20 +647,23 @@ void testMatrixInverse(int height) { MatrixPtr cpuI = std::make_shared(height, height); MatrixPtr gpuI = std::make_shared(height, height); + /* Make matrix well conditioned: cpu * cpuT + Identity */ cpu->randomizeUniform(); + MatrixPtr cpuT = cpu->getTranspose(); + MatrixPtr outputCheck = std::make_shared(height, height); + outputCheck->mul(cpu, cpuT); + cpu->setDiag(1.0); + cpu->add(*outputCheck); + gpu->copyFrom(*cpu); cpu->inverse(cpuI, false); gpu->inverse(gpuI, false); - MatrixPtr outputCheck = std::make_shared(height, height); outputCheck->copyFrom(*gpuI); MatrixCheckErr(*cpuI, *outputCheck); outputCheck->mul(cpu, cpuI); - cpu->zeroMem(); - for (int i = 0; i < height; i++) { - cpu->getRowBuf(i)[i] = 1.0; - } + cpu->setDiag(1.0); MatrixCheckErr(*cpu, *outputCheck); } diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1459c9a84a56f29c986f29ed1c6b96661c11ee67..fec4c4f9f8d8de8f8cc4bbc70220fdcad6de5f87 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -592,7 +592,7 @@ class MixedLayerType(LayerOutput): def __exit__(self, *args, **kwargs): del args, kwargs # unused parameter to suppress warning assert len(self.inputs) != 0 - MixedLayer( + ml = MixedLayer( name=self.name, size=self.size, active_type=self.activation.name, @@ -600,6 +600,9 @@ class MixedLayerType(LayerOutput): inputs=self.inputs, **ExtraLayerAttribute.to_kwargs(self.layer_attr) ) + # update the size which might be computed inside MixedLayer + # according to the operator's output size + self.size = ml.config.size @wrap_name_default("mixed") @@ -2104,7 +2107,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): if layer_type == LayerType.CONCAT_LAYER: assert not bias_attr - + Layer( name=name, type=layer_type, inputs=[x.name for x in input] if is_concat_layer else input, @@ -2682,7 +2685,7 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None): assert isinstance(input1, LayerOutput) assert isinstance(input2, LayerOutput) Layer(name=name, - type="out_prod", + type=LayerType.OUT_PROD_LAYER, inputs=[input1.name, input2.name], **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput(name=name, @@ -2849,7 +2852,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size, def __cost_input__(input, label, weight=None): """ - inputs and parents for cost layers. + inputs and parents for cost layers. """ ipts = [Input(input.name), Input(label.name)] parents = [input, label] @@ -2858,7 +2861,7 @@ def __cost_input__(input, label, weight=None): ipts.append(Input(weight.name)) parents.append(weight) return ipts, parents - + @wrap_name_default() @layer_support() @@ -2943,7 +2946,7 @@ def classification_cost(input, label, weight=None, name=None, def conv_operator(img, filter, filter_size, num_filters, - num_channel=None, stride=1, padding=0, + num_channels=None, stride=1, padding=0, filter_size_y=None, stride_y=None, padding_y=None): """ Different from img_conv_layer, conv_op is an Operator, which can be used @@ -2973,8 +2976,8 @@ def conv_operator(img, filter, filter_size, num_filters, :type filter_size_y: int :param num_filters: channel of output data. :type num_filters: int - :param num_channel: channel of input data. - :type num_channel: int + :param num_channels: channel of input data. + :type num_channels: int :param stride: The x dimension of the stride. :type stride: int :param stride_y: The y dimension of the stride. @@ -2993,19 +2996,19 @@ def conv_operator(img, filter, filter_size, num_filters, if padding_y is None: padding_y = padding - if num_channel is None: - num_channel = img.num_filters + if num_channels is None: + num_channels = img.num_filters assert isinstance(filter, LayerOutput) if filter.size is not None: - filter.size = filter_size * filter_size_y * num_filters * num_channel + filter.size = filter_size * filter_size_y * num_filters * num_channels op = ConvOperator(input_layer_names=[img.name, filter.name], num_filters=num_filters, conv_conf=Conv(filter_size=filter_size, padding=padding, stride=stride, - channels=num_channel, + channels=num_channels, filter_size_y=filter_size_y, padding_y=padding_y, stride_y=stride_y, @@ -3045,8 +3048,8 @@ def conv_projection(input, filter_size, num_filters, :type filter_size_y: int :param num_filters: channel of output data. :type num_filters: int - :param num_channel: channel of input data. - :type num_channel: int + :param num_channels: channel of input data. + :type num_channels: int :param stride: The x dimension of the stride. :type stride: int :param stride_y: The y dimension of the stride. @@ -3537,15 +3540,15 @@ def maxout_layer(input, - Input: output of a conv layer. - Output: feature map size same as input. Channel is (input channel) / groups. - So groups should be larger than 1, and the num of channels should be able + So groups should be larger than 1, and the num of channels should be able to devided by groups. - Please refer to Paper: + Please refer to Paper: - Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf - Multi-digit Number Recognition from Street View \ Imagery using Deep Convolutional Neural Networks: \ https://arxiv.org/pdf/1312.6082v4.pdf - + The simple usage is: .. code-block:: python @@ -3790,9 +3793,9 @@ def nce_layer(input, label, num_classes, weight=None, :param weight: weight layer, can be None(default) :type weight: LayerOutput :param num_classes: number of classes. - :type num_classes: int + :type num_classes: int :param num_neg_samples: number of negative samples. Default is 10. - :type num_neg_samples: int + :type num_neg_samples: int :param neg_distribution: The distribution for generating the random negative labels. A uniform distribution will be used if not provided. If not None, its length must be equal to num_classes. @@ -3813,7 +3816,7 @@ def nce_layer(input, label, num_classes, weight=None, assert isinstance(neg_distribution, collections.Sequence) assert len(neg_distribution) == num_classes assert sum(neg_distribution) == 1 - + ipts_for_layer = [] parents = [] for each_input in input: diff --git a/python/paddle/trainer_config_helpers/tests/configs/projections.py b/python/paddle/trainer_config_helpers/tests/configs/projections.py index 4066c5bc6e0f06e43b1c4d13020c092babdaea91..51194b5a2a8ae692ba21aba8d1d565d2adc7c20b 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/projections.py +++ b/python/paddle/trainer_config_helpers/tests/configs/projections.py @@ -35,7 +35,7 @@ flt = data_layer(name='filter', size=3*3*1*64) with mixed_layer() as m7: m7 += conv_operator(img=img, filter=flt, num_filters=64, - num_channel=1, filter_size=3) + num_channels=1, filter_size=3) end = mixed_layer(input=[full_matrix_projection(input=m5), trans_full_matrix_projection(input=m6), diff --git a/python/paddle/trainer_config_helpers/tests/layers_test_config.py b/python/paddle/trainer_config_helpers/tests/layers_test_config.py index faaab9107d8fbf11afafb722075acbe986efe9fd..26be84f122180cdc2b6a46df5480a9184fa41d2c 100644 --- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py +++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py @@ -29,9 +29,11 @@ z1 = mixed_layer(act=LinearActivation(), filter=y1, filter_size=1, num_filters=5, - num_channel=5, + num_channels=5, stride=1)]) +assert z1.size > 0 + y2 = fc_layer(input=y, size=15) cos1 = cos_sim(a=x1, b=y1)