diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 2f85dd3c3b69d21cffede49b001298c6629900a6..3c2df52fed4f86675ce8f1ead6a3b66e4babde34 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -605,7 +605,7 @@ public: int batchSize = input->getHeight(); int size = 1; resizeOutput(batchSize, size); - output_.value->sumRows(*input); + output_.value->sumRows(*input, /* scaleSum= */1, /* scaleDest= */0); } virtual void backward(const UpdateCallback& callback = nullptr) { diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index c3c425a23dc25e8ed7bb6705189510482639c12d..54448bdb5a9bb4f665f28f973eada30a07fb5eee 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -1473,6 +1473,21 @@ int BaseMatrixT::applyRow(Agg agg, Saver sv, BaseMatrixT& b) { return 0; } +template<> +template +int BaseMatrixT::applyRow( + Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) { + if (scaleDest != 0) { + applyRow(agg, base::binary::add2(scaleDest, scaleAgg), b); + } else { + applyRow(agg, base::binary::second(), b); + if (scaleAgg != 1) { + mulScalar(scaleAgg); + } + } + return 0; +} + template<> template int BaseMatrixT::applyRow(Agg agg, Op op, Saver sv, @@ -1490,6 +1505,21 @@ int BaseMatrixT::applyRow(Agg agg, Op op, Saver sv, return 0; } +template<> +template +int BaseMatrixT::applyRow(Agg agg, Op op, real scaleDest, real scaleAgg, + BaseMatrixT& b, BaseMatrixT& c) { + if (scaleDest != 0) { + applyRow(agg, op, base::binary::add2(scaleDest, scaleAgg), b, c); + } else { + applyRow(agg, op, base::binary::second(), b, c); + if (scaleAgg != 1) { + mulScalar(scaleAgg); + } + } + return 0; +} + template<> template int BaseMatrixT::applyCol(Agg agg, BaseMatrixT& b) { @@ -1518,9 +1548,24 @@ int BaseMatrixT::applyCol(Agg agg, Saver sv, BaseMatrixT& b) { return 0; } +template<> +template +int BaseMatrixT::applyCol( + Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b) { + if (scaleDest != 0) { + applyCol(agg, base::binary::add2(scaleDest, scaleAgg), b); + } else { + applyCol(agg, base::binary::second(), b); + if (scaleAgg != 1) { + mulScalar(scaleAgg); + } + } + return 0; +} + template<> void BaseMatrixT::sumRows(BaseMatrixT& b, real scaleSum, real scaleDest) { - applyRow(aggregate::sum(), base::binary::add2(scaleDest, scaleSum), b); + applyRow(aggregate::sum(), scaleDest, scaleSum, b); } template<> @@ -1550,21 +1595,21 @@ void BaseMatrixT::minCols(BaseMatrixT& b) { template<> void BaseMatrixT::sumCols(BaseMatrixT& b, real scaleSum, real scaleDest) { - applyCol(aggregate::sum(), base::binary::add2(scaleDest, scaleSum), b); + applyCol(aggregate::sum(), scaleDest, scaleSum, b); } template<> void BaseMatrixT::sumOfSquaredDiffs( BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) { applyRow(aggregate::sum(), base::binary::squaredDiff(), - base::binary::add2(scaleDest, scaleSum), b, c); + scaleDest, scaleSum, b, c); } template<> void BaseMatrixT::sumOfProducts( BaseMatrixT& b, BaseMatrixT& c, real scaleSum, real scaleDest) { applyRow(aggregate::sum(), base::binary::mul(), - base::binary::add2(scaleDest, scaleSum), b, c); + scaleDest, scaleSum, b, c); } template class BaseMatrixT; diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h index fd1604b985dd5cdce8f69a5e4f9a07b5760f9a0d..3a91fdc3c30c5332866a97c256b018eb0982260f 100644 --- a/paddle/math/BaseMatrix.h +++ b/paddle/math/BaseMatrix.h @@ -317,6 +317,11 @@ public: template int applyRow(Agg agg, Op op, Saver sv, BaseMatrixT& b, BaseMatrixT& c); + // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg) + template + int applyRow(Agg agg, Op op, real scaleDest, real scaleAgg, + BaseMatrixT& b, BaseMatrixT& c); + /** * a aggregate expression that apply each row of matrix b. * @@ -329,6 +334,10 @@ public: template int applyRow(Agg agg, Saver sv, BaseMatrixT& b); + // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg) + template + int applyRow(Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b); + /** * a aggregate expression that apply each column of matrix b. * @@ -352,6 +361,10 @@ public: template int applyCol(Agg agg, Saver sv, BaseMatrixT& b); + // Same as the above with the special handing of sv=add2(scaleDest, scaleAgg) + template + int applyCol(Agg agg, real scaleDest, real scaleAgg, BaseMatrixT& b); + bool useGpu() const { return useGpu_; } const T* rowBuf(size_t row) const { return data_ + width_ * row; } diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index ca5ab68c5c2b4bbf61110978f824b631d6d78331..b5e10ef81009a00e76b0c4147b404ba0aaba72b3 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -29,7 +29,6 @@ except ImportError: import pickle import copy -<<<<<<< 0ba0f02c685e52b14632f6b9bfca4321494505c7 __all__ = [ "full_matrix_projection", "AggregateLevel", @@ -1456,11 +1455,11 @@ def bilinear_interp_layer(input, .. code-block:: python bilinear = bilinear_interp_layer(input=layer1, out_size_x=64, out_size_y=64) - + :param input: A input layer. :type input: LayerOutput. :param out_size_x: bilinear interpolation output width. - :type out_size_x: int|None + :type out_size_x: int|None :param out_size_y: bilinear interpolation output height. :type out_size_y: int|None :param name: The layer's name, which cna not be specified. @@ -1772,11 +1771,11 @@ def img_conv_layer(input, The details of convolution layer, please refer UFLDL's `convolution `_ . - - Convolution Transpose (deconv) layer for image. Paddle only support square + + Convolution Transpose (deconv) layer for image. Paddle only support square input currently and thus input image's width equals height. - The details of convolution transpose layer, + The details of convolution transpose layer, please refer to the following explanation and references therein `_ . @@ -4422,7 +4421,7 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): .. code-block:: python - cost = cross_entropy(input=input_layer, + cost = cross_entropy(input=input_layer, label=label_layer) :param input: The first input layer. @@ -4462,7 +4461,7 @@ def cross_entropy_with_selfnorm(input, .. code-block:: python - cost = cross_entropy_with_selfnorm(input=input_layer, + cost = cross_entropy_with_selfnorm(input=input_layer, label=label_layer) :param input: The first input layer. @@ -4532,7 +4531,7 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): .. code-block:: python - cost = huber_cost(input=input_layer, + cost = huber_cost(input=input_layer, label=label_layer) :param input: The first input layer. @@ -4572,7 +4571,7 @@ def multi_binary_label_cross_entropy(input, .. code-block:: python - cost = multi_binary_label_cross_entropy(input=input_layer, + cost = multi_binary_label_cross_entropy(input=input_layer, label=label_layer) :param input: The first input layer.