diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst index 8d297b0cf23acc691718890a47da38f4eff00d0c..4a02af396993207d305be488c993ce94cf20fe1d 100644 --- a/doc/ui/api/trainer_config_helpers/layers.rst +++ b/doc/ui/api/trainer_config_helpers/layers.rst @@ -191,6 +191,12 @@ embedding_layer :members: embedding_layer :noindex: +scaling_projection +----------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: scaling_projection + :noindex: + dotmul_projection ----------------- .. automodule:: paddle.trainer_config_helpers.layers diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 2f85dd3c3b69d21cffede49b001298c6629900a6..3c2df52fed4f86675ce8f1ead6a3b66e4babde34 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -605,7 +605,7 @@ public: int batchSize = input->getHeight(); int size = 1; resizeOutput(batchSize, size); - output_.value->sumRows(*input); + output_.value->sumRows(*input, /* scaleSum= */1, /* scaleDest= */0); } virtual void backward(const UpdateCallback& callback = nullptr) { diff --git a/paddle/gserver/layers/FullMatrixProjection.cpp b/paddle/gserver/layers/FullMatrixProjection.cpp index 8241cbd37ec623622f19ff2ba35c21a4e3e3533a..f17c1b05bd892c7d933e4910887f977ac5cda79b 100644 --- a/paddle/gserver/layers/FullMatrixProjection.cpp +++ b/paddle/gserver/layers/FullMatrixProjection.cpp @@ -52,7 +52,9 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) { } hl_set_sync_flag(syncFlag); - parameter_->incUpdate(callback); + if (weight_->getWGrad()) { + parameter_->incUpdate(callback); + } } } // namespace paddle diff --git a/paddle/gserver/layers/ScalingProjection.cpp b/paddle/gserver/layers/ScalingProjection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c0a7072c6a7cc1d37723f43d1068483779f56437 --- /dev/null +++ b/paddle/gserver/layers/ScalingProjection.cpp @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Projection.h" + +namespace paddle { + +class ScalingProjection : public Projection { +public: + ScalingProjection(const ProjectionConfig& config, + const ParameterPtr& parameter, bool useGpu) + : Projection(config, parameter, useGpu) { + CHECK_EQ(parameter->getSize(), 1UL); + weight_.reset(new Weight(1, 1, parameter)); + } + + void forward() { + CHECK(in_->value); + out_->value->add(*in_->value, weight_->getW()->getElement(0, 0)); + } + + void backward(const UpdateCallback& callback) { + if (weight_->getWGrad()) { + auto sum = Matrix::create(in_->value->getHeight(), 1, false, useGpu_); + sum->sumOfProducts(*in_->value, *out_->grad, + /* scaleSum= */1, /* scaleDest= */0); + weight_->getWGrad()->sumCols(*sum, + /* scaleSum= */1, /* scaleDest= */1); + parameter_->incUpdate(callback); + } + if (in_->grad) { + in_->grad->add(*out_->grad, weight_->getW()->getElement(0, 0)); + } + } + +protected: + std::unique_ptr weight_; +}; + +REGISTER_PROJECTION(scaling, ScalingProjection); + +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index f3cd2b4faf0c173cbb4997aac1a00ebba3027c92..a79dfe39c9bb26c7b2acec1051699e1804494d93 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -135,6 +135,17 @@ TEST(Projection, identity) { } } +TEST(Projection, scaling) { + ProjectionConfig conf; + conf.set_type("scaling"); + conf.set_input_size(10); + conf.set_output_size(10); + for (auto useGpu : {false}) { + testProjectionGrad(conf, INPUT_DATA, /* parameterSize */ 1, + /* batchSize */ 100, useGpu); + } +} + #ifndef PADDLE_ONLY_CPU TEST(Projection, conv) { const int NUM_FILTERS = 16; diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index d81b99e5441584b21fb023dcae65ccec7dd27996..54448bdb5a9bb4f665f28f973eada30a07fb5eee 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -1451,6 +1451,8 @@ int BaseMatrixT::applyRow(Agg agg, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); int numRows = b.height_; int numCols = b.width_; + CHECK_EQ(height_, numRows); + CHECK_EQ(width_, 1UL); aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows, numCols, offset, false_type(), true_type() /*aAsColVector*/); @@ -1463,18 +1465,69 @@ int BaseMatrixT::applyRow(Agg agg, Saver sv, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); int numRows = b.height_; int numCols = b.width_; + CHECK_EQ(height_, numRows); + CHECK_EQ(width_, 1UL); aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset, false_type(), true_type() /*aAsColVector*/); return 0; } +template<> +template +int BaseMatrixT::applyRow( + Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) { + if (scaleDest != 0) { + applyRow(agg, base::binary::add2(scaleDest, scaleAgg), b); + } else { + applyRow(agg, base::binary::second(), b); + if (scaleAgg != 1) { + mulScalar(scaleAgg); + } + } + return 0; +} + +template<> +template +int BaseMatrixT::applyRow(Agg agg, Op op, Saver sv, + BaseMatrixT& b, BaseMatrixT& c) { + MatrixOffset offset(0, 0, 0, 0, 0, 0); + int numRows = b.height_; + int numCols = b.width_; + CHECK_EQ(height_, numRows); + CHECK_EQ(width_, 1UL); + CHECK_EQ(c.height_, numRows); + CHECK_EQ(c.width_, numCols); + aggregate(agg, op, sv, + b, c, numRows, numCols, offset, + false_type(), true_type() /*aAsColVector*/); + return 0; +} + +template<> +template +int BaseMatrixT::applyRow(Agg agg, Op op, real scaleDest, real scaleAgg, + BaseMatrixT& b, BaseMatrixT& c) { + if (scaleDest != 0) { + applyRow(agg, op, base::binary::add2(scaleDest, scaleAgg), b, c); + } else { + applyRow(agg, op, base::binary::second(), b, c); + if (scaleAgg != 1) { + mulScalar(scaleAgg); + } + } + return 0; +} + template<> template int BaseMatrixT::applyCol(Agg agg, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); int numRows = b.height_; int numCols = b.width_; + CHECK_EQ(width_, numCols); + CHECK_EQ(height_, 1UL); aggregate(agg, base::unary::identity(), base::binary::second(), b, numRows, numCols, offset, true_type() /*aAsRowVector*/, false_type()); @@ -1487,6 +1540,8 @@ int BaseMatrixT::applyCol(Agg agg, Saver sv, BaseMatrixT& b) { MatrixOffset offset(0, 0, 0, 0, 0, 0); int numRows = b.height_; int numCols = b.width_; + CHECK_EQ(width_, numCols); + CHECK_EQ(height_, 1UL); aggregate(agg, base::unary::identity(), sv, b, numRows, numCols, offset, true_type() /*aAsRowVector*/, false_type()); @@ -1494,8 +1549,23 @@ int BaseMatrixT::applyCol(Agg agg, Saver sv, BaseMatrixT& b) { } template<> -void BaseMatrixT::sumRows(BaseMatrixT& b) { - applyRow(aggregate::sum(), b); +template +int BaseMatrixT::applyCol( + Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) { + if (scaleDest != 0) { + applyCol(agg, base::binary::add2(scaleDest, scaleAgg), b); + } else { + applyCol(agg, base::binary::second(), b); + if (scaleAgg != 1) { + mulScalar(scaleAgg); + } + } + return 0; +} + +template<> +void BaseMatrixT::sumRows(BaseMatrixT& b, real scaleSum, real scaleDest) { + applyRow(aggregate::sum(), scaleDest, scaleSum, b); } template<> @@ -1524,18 +1594,22 @@ void BaseMatrixT::minCols(BaseMatrixT& b) { } template<> -void BaseMatrixT::sumCols(BaseMatrixT& b, real scale) { - applyCol(aggregate::sum(), base::binary::add2(1.0, scale), b); +void BaseMatrixT::sumCols(BaseMatrixT& b, real scaleSum, real scaleDest) { + applyCol(aggregate::sum(), scaleDest, scaleSum, b); } template<> -void BaseMatrixT::sumOfSquares(BaseMatrixT& b, BaseMatrixT& c) { - int numRows = b.height_; - int numCols = b.width_; - MatrixOffset offset(0, 0, 0, 0, 0, 0); - aggregate(aggregate::sum(), base::binary::squaredDiff(), base::binary::add(), - b, c, numRows, numCols, offset, false_type(), - true_type() /*aAsColVector*/); +void BaseMatrixT::sumOfSquaredDiffs( + BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) { + applyRow(aggregate::sum(), base::binary::squaredDiff(), + scaleDest, scaleSum, b, c); +} + +template<> +void BaseMatrixT::sumOfProducts( + BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) { + applyRow(aggregate::sum(), base::binary::mul(), + scaleDest, scaleSum, b, c); } template class BaseMatrixT; diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h index 2dd2c2c7a9b985924d53cb3bf8840eb1e55eee3e..3a91fdc3c30c5332866a97c256b018eb0982260f 100644 --- a/paddle/math/BaseMatrix.h +++ b/paddle/math/BaseMatrix.h @@ -305,6 +305,23 @@ public: template int applyRow(Agg agg, BaseMatrixT& b); + /** + * a aggregate expression that apply each row of matrix b. + * + * @code + * for each row i & 0 <= j < b.width_, do: + * dst = agg(op(b[i*ldb + j], c[i*ldc + j]) + * this[i] = sv(this[i], dst) + * @endcode + */ + template + int applyRow(Agg agg, Op op, Saver sv, BaseMatrixT& b, BaseMatrixT& c); + + // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg) + template + int applyRow(Agg agg, Op op, real scaleDest, real scaleAgg, + BaseMatrixT& b, BaseMatrixT& c); + /** * a aggregate expression that apply each row of matrix b. * @@ -317,6 +334,10 @@ public: template int applyRow(Agg agg, Saver sv, BaseMatrixT& b); + // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg) + template + int applyRow(Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b); + /** * a aggregate expression that apply each column of matrix b. * @@ -340,6 +361,10 @@ public: template int applyCol(Agg agg, Saver sv, BaseMatrixT& b); + // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg) + template + int applyCol(Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b); + bool useGpu() const { return useGpu_; } const T* rowBuf(size_t row) const { return data_ + width_ * row; } @@ -920,7 +945,9 @@ public: void addRowScale(size_t cCol, BaseMatrixT& b, BaseMatrixT& c); /// calculate the sum of each row of the matrix b. - void sumRows(BaseMatrixT& b); + /// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} + void sumRows(BaseMatrixT& b, T scaleSum, T scaleDest); + /// calculate the maximum value of each row of the matrix b. void maxRows(BaseMatrixT& b); /// calculate the minimum value of each row of the matrix b. @@ -932,10 +959,18 @@ public: void maxCols(BaseMatrixT& b); /// calculate the minimum value of each column of the matrix b. void minCols(BaseMatrixT& b); - void sumCols(BaseMatrixT& b, T scale); - /// calculate the sum of each row of (b - c)^2. - void sumOfSquares(BaseMatrixT& b, BaseMatrixT& c); + /// calculate the sum of each column of the matrix b. + /// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ji} + void sumCols(BaseMatrixT& b, T scaleSum, T scaleDest); + + /// this_i = scaleDest * this_i + scaleSum * \sum_j (b_{ij} - c_{ij})^2 + void sumOfSquaredDiffs(BaseMatrixT& b, BaseMatrixT& c, + T scaleSum, T scaleDest); + + /// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} * c_{ij} + void sumOfProducts(BaseMatrixT& b, BaseMatrixT& c, + T scaleSum, T scaleDest); /** * @code diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 5ee8fbebfcfbe9696f7836b6b1c88e724551da8e..706a598d0c33762b0578190ea4a0aa06247a88ef 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -242,7 +242,7 @@ real GpuMatrix::getSum() { void GpuMatrix::accumulateColSum(Matrix& src) { CHECK_EQ(getWidth(), src.getWidth()); CHECK_EQ(getHeight(), (size_t)1); - sumCols(src, 1.0); + sumCols(src, 1.0, 1.0); } real GpuMatrix::getAbsSum() { @@ -389,7 +389,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) { CHECK_EQ(width_, a.getWidth()); GpuSparseMatrix* sMatPtr = dynamic_cast(&a); if (!sMatPtr) { - sumCols(a, scale); + sumCols(a, /* scaleSum= */scale, /* scaleDest= */1); } else { real* data = getData(); hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get(); @@ -589,7 +589,7 @@ void GpuMatrix::addToRows(Matrix& table, IVector& ids) { void GpuMatrix::colMerge(Matrix& src) { CHECK(src.height_ == height_); if (!trans_ && !src.trans_) { - sumRows(src); + sumRows(src, /* scaleSum= */1, /* scaleDest= */0); } else { LOG(FATAL) << "Is not supported"; } @@ -599,7 +599,7 @@ void GpuMatrix::rowSum(Matrix& sum) { CHECK_EQ(sum.getHeight(), getHeight()); CHECK_EQ(sum.getWidth(), (size_t)1); - sum.sumRows(*this); + sum.sumRows(*this, /* scaleSum= */1, /* scaleDest= */0); } void GpuMatrix::rowMax(Matrix& max) { @@ -790,7 +790,8 @@ void GpuMatrix::sumOfSquares(Matrix& output, Matrix& label) { LOG(FATAL) << "not supported: GpuSparseMatrix as label"; } - BaseMatrix::sumOfSquares(output, label); + BaseMatrix::sumOfSquaredDiffs(output, label, + /* scaleSum= */1, /* scaleDest= */1); } void GpuMatrix::sumOfSquaresBp(Matrix& outputV, Matrix& label) { @@ -1501,7 +1502,7 @@ void CpuMatrix::accumulateColSum(Matrix& src) { CHECK_EQ(getWidth(), src.getWidth()); CHECK_EQ(getHeight(), (size_t)1); - sumCols(src, 1.0); + sumCols(src, /* scaleSum= */1, /* scaleDest= */1); } real CpuMatrix::getAbsSum() { @@ -2188,7 +2189,7 @@ void CpuMatrix::collectBias(Matrix& a, real scale) { CHECK_EQ(width_, a.getWidth()); CpuSparseMatrix* aptr = dynamic_cast(&a); if (!aptr) { - sumCols(a, scale); + sumCols(a, /* scaleSum= */scale, /* scaleDest= */1); } else { size_t nnz = aptr->getElementCnt(); int* cols = aptr->getCols(); @@ -2227,7 +2228,7 @@ void CpuMatrix::sequenceAvgForward(Matrix& a, real* dst = getData(); real* src = a.getData(); const int* starts = startsPos.getData(); - MatrixPtr outMtx = Matrix::create(1, 1, false, false); + MatrixPtr outMtx = Matrix::create(nullptr, 1, width, false, false); MatrixPtr dataMtx = Matrix::create(nullptr, 1, width, false, false); for (size_t i = 0; i < height; i++) { int sequenceLength = starts[i + 1] - starts[i]; @@ -2239,13 +2240,15 @@ void CpuMatrix::sequenceAvgForward(Matrix& a, dataMtx->setData(src + starts[i] * width, sequenceLength, width); if (mode == 0) { // plain average - outMtx->sumCols(*dataMtx, (real)1 / (real)sequenceLength); + outMtx->sumCols(*dataMtx, (real)1 / (real)sequenceLength, + /* scaleDest= */1); } else if (mode == 1) { // sum instead of average - outMtx->sumCols(*dataMtx, (real)1); + outMtx->sumCols(*dataMtx, /* scaleSum= */1, /* scaleDest= */1); } else if (mode == 2) { // divide by square root of sequenceLength - outMtx->sumCols(*dataMtx, (real)1 / std::sqrt(sequenceLength)); + outMtx->sumCols(*dataMtx, (real)1 / std::sqrt(sequenceLength), + /* scaleDest= */1); } else { LOG(FATAL) << "should not reach here"; } @@ -2932,7 +2935,7 @@ void CpuMatrix::rowSum(Matrix& sum) { CHECK_EQ(sum.getHeight(), getHeight()); CHECK_EQ(sum.getWidth(), (size_t)1); - sum.sumRows(*this); + sum.sumRows(*this, /* scaleSum= */1, /* scaleDest= */0); } void CpuMatrix::rowMaxId(IVector& maxIds) { @@ -3485,7 +3488,8 @@ void CpuMatrix::sumOfSquares(Matrix& output, Matrix& label) { } } - BaseMatrix::sumOfSquares(output, label); + BaseMatrix::sumOfSquaredDiffs(output, label, + /* scaleSum= */1, /* scaleDest= */1); } /* calculate the error of outputV according to label */ diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 881f0b821491be12e57be0fef04a38fc95fca4eb..3e55a9f9f565a7719fb20b3a5dda6b61e8961d5b 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -592,6 +592,20 @@ class DotMulProjection(Projection): def calc_parameter_dims(self, input_size, output_size): return [1, output_size] +# ScalingProjection +@config_class +class ScalingProjection(Projection): + type = 'scaling' + + def calc_output_size(self, input_layer_config): + return input_layer_config.size + + def calc_parameter_size(self, input_size, output_size): + return 1 + + def calc_parameter_dims(self, input_size, output_size): + return [1, 1] + @config_class class TableProjection(Projection): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 796121a64136ee3f31b2ed09b761c6a83cdbe625..b5e10ef81009a00e76b0c4147b404ba0aaba72b3 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -65,6 +65,7 @@ __all__ = [ 'StaticInput', 'expand_layer', 'scaling_layer', + 'scaling_projection', 'power_layer', 'interpolation_layer', 'bilinear_interp_layer', @@ -458,7 +459,7 @@ def identity_projection(input, offset=None): :type input: LayerOutput :param offset: Offset, None if use default. :type offset: int - :return: A IdentityProjection or IdentityOffsetProjection Object + :return: A IdentityProjection or IdentityOffsetProjection object :rtype: IdentityProjection or IdentityOffsetProjection """ if offset is None: @@ -471,6 +472,34 @@ def identity_projection(input, offset=None): return proj +@wrap_param_attr_default() +def scaling_projection(input, param_attr=None): + """ + scaling_projection multiplies the input with a scalar parameter and add to + the output. + + .. math:: + out += w * in + + The example usage is: + + .. code-block:: python + + proj = scaling_projection(input=layer) + + :param input: Input Layer. + :type input: LayerOutput + :param param_attr: Parameter config, None if use default. + :type param_attr: ParameterAttribute + :return: A ScalingProjection object + :rtype: ScalingProjection + """ + proj = ScalingProjection(input_layer_name=input.name, + **param_attr.attr) + proj.origin = input + return proj + + @wrap_param_attr_default() def dotmul_projection(input, param_attr=None): """ @@ -1426,11 +1455,11 @@ def bilinear_interp_layer(input, .. code-block:: python bilinear = bilinear_interp_layer(input=layer1, out_size_x=64, out_size_y=64) - + :param input: A input layer. :type input: LayerOutput. :param out_size_x: bilinear interpolation output width. - :type out_size_x: int|None + :type out_size_x: int|None :param out_size_y: bilinear interpolation output height. :type out_size_y: int|None :param name: The layer's name, which cna not be specified. @@ -1742,11 +1771,11 @@ def img_conv_layer(input, The details of convolution layer, please refer UFLDL's `convolution `_ . - - Convolution Transpose (deconv) layer for image. Paddle only support square + + Convolution Transpose (deconv) layer for image. Paddle only support square input currently and thus input image's width equals height. - The details of convolution transpose layer, + The details of convolution transpose layer, please refer to the following explanation and references therein `_ . @@ -4392,7 +4421,7 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): .. code-block:: python - cost = cross_entropy(input=input_layer, + cost = cross_entropy(input=input_layer, label=label_layer) :param input: The first input layer. @@ -4432,7 +4461,7 @@ def cross_entropy_with_selfnorm(input, .. code-block:: python - cost = cross_entropy_with_selfnorm(input=input_layer, + cost = cross_entropy_with_selfnorm(input=input_layer, label=label_layer) :param input: The first input layer. @@ -4502,7 +4531,7 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): .. code-block:: python - cost = huber_cost(input=input_layer, + cost = huber_cost(input=input_layer, label=label_layer) :param input: The first input layer. @@ -4542,7 +4571,7 @@ def multi_binary_label_cross_entropy(input, .. code-block:: python - cost = multi_binary_label_cross_entropy(input=input_layer, + cost = multi_binary_label_cross_entropy(input=input_layer, label=label_layer) :param input: The first input layer. diff --git a/python/paddle/trainer_config_helpers/tests/configs/projections.py b/python/paddle/trainer_config_helpers/tests/configs/projections.py index 19ac6ec9061d67fa73e10e95e15fde5322b00503..aa4521dcd5db3f845871cfaaedb02a86bcbddc38 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/projections.py +++ b/python/paddle/trainer_config_helpers/tests/configs/projections.py @@ -26,6 +26,7 @@ with mixed_layer() as m5: with mixed_layer() as m6: m6 += dotmul_operator(a=m3, b=m4) + m6 += scaling_projection(m3) img = data_layer(name='img', size=32 * 32) flt = data_layer(name='filter', size=3 * 3 * 1 * 64) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr index e47e531a2223ddaa9dd1dfaf1fcee8a11008cbbd..2b3951c242411e0c0990a52bcb2ae6b1723a9367 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr @@ -111,13 +111,23 @@ layers { inputs { input_layer_name: "__mixed_2__" } + inputs { + input_layer_name: "__mixed_2__" + input_parameter_name: "___mixed_5__.w1" + proj_conf { + type: "scaling" + name: "___mixed_5__.w1" + input_size: 100 + output_size: 100 + } + } inputs { input_layer_name: "__mixed_3__" } operator_confs { type: "dot_mul" input_indices: 0 - input_indices: 1 + input_indices: 2 input_sizes: 100 input_sizes: 100 output_size: 100 @@ -258,6 +268,16 @@ parameters { initial_strategy: 0 initial_smart: false } +parameters { + name: "___mixed_5__.w1" + size: 1 + initial_mean: 0.0 + initial_std: 1.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: true +} parameters { name: "___mixed_7__.w0" size: 30000