diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 1518a8a654cfb54376a49760dc5873733c916937..138f7dcf1680d42ee5b328996b2abb8b32a93a75 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -37,6 +37,7 @@ if(WITH_GPU) add_simple_unittest(MulOpTest) add_simple_unittest(CosSimOpTest) add_simple_unittest(RowConvOpTest) + add_simple_unittest(SwitchOpTest) endif() add_simple_unittest(ConvOpTest) diff --git a/paddle/function/SwitchOp.cpp b/paddle/function/SwitchOp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4667c4e01d52aec453b108712144d08ac1e7c3c0 --- /dev/null +++ b/paddle/function/SwitchOp.cpp @@ -0,0 +1,132 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "SwitchOp.h" +#include "paddle/math/Vector.h" + +namespace paddle { + +template <> +void NCHW2NHWC(real* outputs, + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW) { + for (int n = 0; n < num; ++n) { + for (int c = 0; c < inC; ++c) { + for (int h = 0; h < inH; ++h) { + for (int w = 0; w < inW; ++w) { + outputs[((n * inH + h) * inW + w) * inC + c] = *(inputs++); + } + } + } + } +} + +template <> +void NHWC2NCHW(real* outputs, + const real* inputs, + const int num, + const int inH, + const int inW, + const int inC) { + for (int n = 0; n < num; ++n) { + for (int h = 0; h < inH; ++h) { + for (int w = 0; w < inW; ++w) { + for (int c = 0; c < inC; ++c) { + outputs[((n * inC + c) * inH + h) * inW + w] = *(inputs++); + } + } + } + } +} + +/** + * \brief Padding zeros to input according to the specify dimension. + * The struct pad_ contains the padding size in each dimension. + * The input and output is a 4D tensor. In PadFunc, we only + * pad zeros to the 2nd to 4th dimension. + * + * Argument in this Function: + * \param pad_ A struct object contains the padding size in each dimension. + * It has six integers. The channelStart and channelEnd indicate + * how many zeros to add before and after the input in channel + * dimension. And the heightStart and heightEnd indicate padding + * in height dimension. The widthStart and widthEnd indicate the + * padding in width dimension. + * \param inputs A 4D tensor, only one input. + * \param outputs A 4D tensor, the output value after padding. + * + */ + +template +class NCHW2NHWCFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override {} + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + + size_t num = inputs[0].shape()[0]; + size_t inC = inputs[0].shape()[1]; + size_t inH = inputs[0].shape()[2]; + size_t inW = inputs[0].shape()[3]; + typename Tensor::Vector vec(outputs[0].shape().getElements(), + outputs[0].data()); + vec.zero(); + + NCHW2NHWC( + outputs[0].data(), inputs[0].data(), num, inC, inH, inW); + } +}; + +/** + * \brief The backward propagation of padding Function. Remove the elements + * in the padding positions of forward. + * + * Argument in this Function: + * \param pad_ The same meaning as it in PadFunc. + * \param inputs The gradient with respect to the output value of PadFunc. + * \param outputs The gradient with respect to the input value of PadFunc. + */ + +template +class NHWC2NCHWFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override {} + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + + size_t num = inputs[0].shape()[0]; + size_t inH = inputs[0].shape()[1]; + size_t inW = inputs[0].shape()[2]; + size_t inC = inputs[0].shape()[3]; + + NHWC2NCHW( + outputs[0].data(), inputs[0].data(), num, inH, inW, inC); + } +}; + +REGISTER_TYPED_FUNC(NCHW2NHWC, CPU, NCHW2NHWCFunc); +REGISTER_TYPED_FUNC(NHWC2NCHW, CPU, NHWC2NCHWFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(NCHW2NHWC, GPU, NCHW2NHWCFunc); +REGISTER_TYPED_FUNC(NHWC2NCHW, GPU, NHWC2NCHWFunc); +#endif + +} // namespace paddle diff --git a/paddle/function/SwitchOp.h b/paddle/function/SwitchOp.h new file mode 100644 index 0000000000000000000000000000000000000000..5a2418a703e5136ffaeb0c722ef3731a31765b0f --- /dev/null +++ b/paddle/function/SwitchOp.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +/** + * \brief This funtion switch dimension order of image input. + * The input and output is a 4D tensor. Switch order 'batch_size, + *channels, height, width' to + * order 'batch_size, height, width, channels'. + * + * \param[out] outputs save results. + * \param[in] inputs input data. + * \param[in] num batch size of input data. + * \param[in] inC channel number of input data. + * \param[in] inH height of input data. + * \param[in] inH with of input data. + */ +template +void NCHW2NHWC(real* outputs, + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW); + +/** + * \brief This funtion switch dimension order of image input. + * The input and output is a 4D tensor. Switch order 'batch_size, + *height, width, channels' to + * order 'batch_size, channels, height, width'. + * + * \param[out] inGrad gradients of previous layer. + * \param[in] outGrad output gradients. + * \param[in] num batch size of input data. + * \param[in] inH height of input data. + * \param[in] inW with of input data. + * \param[in] inC channel number of input data. + */ +template +void NHWC2NCHW(real* inGrad, + const real* outGrad, + const int num, + const int inH, + const int inW, + const int inC); +} // namespace paddle diff --git a/paddle/function/SwitchOpGpu.cu b/paddle/function/SwitchOpGpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..c2020cb2ab1cd557939c222526d58010eaaa4a0d --- /dev/null +++ b/paddle/function/SwitchOpGpu.cu @@ -0,0 +1,80 @@ +/* Copyright (c) 2016 Paddle + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "SwitchOp.h" + +namespace paddle { + +__global__ void KeNCHW2NHWC(real* outputs, const real* inputs, + int inC, int inH, int inW, + int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int w = idx % inW; + const int h = (idx / inW) % inH; + const int c = (idx / inW / inH) % inC; + const int n = idx / inW / inH / inC; + + const int off = ((n * inH + h) * inW + w) * inC +c; + outputs[off] = inputs[idx]; + } +} + +template <> +void NCHW2NHWC(real* outputs, + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW) { + size_t nth = num * inC * inH * inW; + int blockSize = 1024; + int gridSize = (nth + 1024 - 1) / 1024; + KeNCHW2NHWC<<>> + (outputs, inputs, inC, inH, inW, nth); + CHECK_SYNC("NCHW2NHWC"); +} + +__global__ void KeNHWC2NCHW(real* outputs, const real* inputs, + int inH, int inW, int inC, + int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int c = idx % inC; + const int w = (idx / inC) % inW; + const int h = (idx / inC / inW) % inH; + const int n = idx / inW / inH / inC; + + const int off = ((n * inC + c) * inH + h) * inW + w; + outputs[off] = inputs[idx]; + } +} + +template <> +void NHWC2NCHW(real* outputs, + const real* inputs, + const int num, + const int inH, + const int inW, + const int inC) { + int nth = num * inC * inH * inW; + int blockSize = 1024; + int gridSize = (nth + 1024 - 1) / 1024; + KeNHWC2NCHW<<>> + (outputs, inputs, inH, inW, inC, nth); + CHECK_SYNC("NHWC2NCHW"); +} + +} // namespace paddle diff --git a/paddle/function/SwitchOpTest.cpp b/paddle/function/SwitchOpTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..03b0dd66ddcbab713969ed747601ecb1b2eb7955 --- /dev/null +++ b/paddle/function/SwitchOpTest.cpp @@ -0,0 +1,44 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" + +namespace paddle { + +TEST(Pad, real) { + for (size_t numSamples : {1, 4, 8, 16}) { + for (size_t channels : {1, 4, 8, 16}) { + for (size_t imgSizeH : {1, 4, 8, 16}) { + for (size_t imgSizeW : {1, 4, 8, 16}) { + VLOG(3) << " numSamples=" << numSamples << " channels=" << channels + << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; + for (bool test_grad : {true, false}) { + CpuGpuFuncCompare compare(test_grad ? "NHWC2NCHW" : "NCHW2NHWC", + FuncConfig()); + TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; + TensorShape outDims{numSamples, imgSizeH, imgSizeW, channels}; + compare.addInputs( + BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); + compare.addOutputs(BufferArg( + VALUE_TYPE_FLOAT, test_grad ? inDims : outDims, ASSIGN_TO)); + compare.run(); + } + } + } + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/PixelSoftmaxLayer.cpp b/paddle/gserver/layers/PixelSoftmaxLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6da84a6303102c60346d10a37642e294852d78cd --- /dev/null +++ b/paddle/gserver/layers/PixelSoftmaxLayer.cpp @@ -0,0 +1,89 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "PixelSoftmaxLayer.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_LAYER(pixel_softmax, PixelSoftmaxLayer); + +bool PixelSoftmaxLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + auto& img_conf = config_.inputs(0).image_conf(); + inH_ = + img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(); + inW_ = img_conf.img_size(); + inC_ = img_conf.channels(); + createFunction(forward_, "NCHW2NHWC", FuncConfig()); + createFunction(backward_, "NHWC2NCHW", FuncConfig()); + inDims_ = TensorShape({0, inH_, inW_, inC_}); + outDims_ = TensorShape({0, inC_, inH_, inW_}); + return true; +} + +void PixelSoftmaxLayer::forward(PassType passType) { + Layer::forward(passType); + MatrixPtr input = inputLayers_[0]->getOutputValue(); + size_t batchSize = input->getHeight(); + // cout<<"useGpu:"<zeroMem(); + resetOutput(batchSize, inH_ * inW_ * inC_); + inDims_.setDim(0, batchSize); + outDims_.setDim(0, batchSize); + + // switch NCHW to NHWC + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getInputValue(0), inDims_); + outputs.addArg(*tmpInput_, outDims_); + forward_[0]->calc(inputs, outputs); + // softmax forward and save softmax result into tmpMatrix_ + tmpInput_->softmax(*tmpOutput_); + + // switch NHWC to NCHW + BufferArgs inputs_1; + BufferArgs outputs_1; + inputs_1.addArg(*tmpOutput_, outDims_); + outputs_1.addArg(*getOutputValue(), inDims_); + backward_[0]->calc(inputs_1, outputs_1); +} + +void PixelSoftmaxLayer::backward(const UpdateCallback& callback) { + (void)callback; + REGISTER_TIMER_INFO("PixelSoftmaxBackward", getName().c_str()); + + // switch NCHW to NHWC + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getOutputGrad(), inDims_); + outputs.addArg(*tmpInput_, outDims_); + forward_[0]->calc(inputs, outputs); + // softmax backward and save grad result into tmpOutput_ + tmpInput_->softmaxBackward(*tmpOutput_); + + // switch NHWC to NCHW + BufferArgs inputs_1; + BufferArgs outputs_1; + inputs_1.addArg(*tmpInput_, outDims_); + outputs_1.addArg(*getInputGrad(0), inDims_); + backward_[0]->calc(inputs_1, outputs_1); +} +} // namespace paddle diff --git a/paddle/gserver/layers/PixelSoftmaxLayer.h b/paddle/gserver/layers/PixelSoftmaxLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..80a4ddad5a6922617290b36c908eea951ae9f9d4 --- /dev/null +++ b/paddle/gserver/layers/PixelSoftmaxLayer.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * \brief This layer calculate softmax in image channel dimension. + */ +class PixelSoftmaxLayer : public Layer { +public: + explicit PixelSoftmaxLayer(const LayerConfig& config) : Layer(config) {} + + ~PixelSoftmaxLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +protected: + uint32_t inC_; + uint32_t inH_; + uint32_t inW_; + TensorShape inDims_; + TensorShape outDims_; + MatrixPtr tmpInput_; + MatrixPtr tmpOutput_; +}; +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 59d1e9273d42d6a53ec284c6ed684096b3f42321..8a9904087e192a1697614b4b9ea6c494977d1fb8 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1792,6 +1792,25 @@ TEST(Layer, RowConvLayer) { } } +TEST(Layer, PixelSoftmaxLayer) { + TestConfig config; + // config input_0 + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ImageConfig* img = input->mutable_image_conf(); + img->set_channels(4); + img->set_img_size(16); + img->set_img_size_y(16); + + // config softmax layer + config.layerConfig.set_type("pixel_softmax"); + config.layerConfig.set_name("pixelSofrmaxLayer"); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "pixel_softmax", 100, false, useGpu, true, 2); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 4431d613f655c1d0c8da13bb5ac9225980c650ad..2c18df3732f3afbe889dfb0e9b8bc978707b7a62 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -3385,6 +3385,27 @@ void CpuMatrix::oneHotCrossEntropyWithSelfNormBp(Matrix& output, real* out = output.getData(); \ for (size_t i = 0; i < numSamples; ++i, grad += dim, out += dim) +void CpuMatrix::softmaxBackward(Matrix& outputV) { + CHECK(!outputV.useGpu()) << "Matrix type are not equal"; + size_t height = getHeight(); + size_t width = getWidth(); + CHECK(height == outputV.getHeight() && width == outputV.getWidth()) + << "Matrix dimensions are not equal"; + Matrix::resizeOrCreate(sftmaxDot_, + height_, + width_, + /* trans */ false, + useGpu_); + Matrix::resizeOrCreate(sftmaxSum_, + height_, + 1, + /* trans */ false, + useGpu_); + sftmaxDot_->dotMul(*this, outputV); + sftmaxSum_->colMerge(*sftmaxDot_); + softmaxDerivative(outputV, *sftmaxSum_); +} + void CpuMatrix::softmax(Matrix& output) { CHECK(!output.useGpu()); diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 7dfd593225065e18830b2b0c0ce854fe7a2d5178..dcb63a2d3fcd4255d527d11a677900c568ef6be8 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -1456,6 +1456,10 @@ public: }; class CpuMatrix : public Matrix { +private: + MatrixPtr sftmaxSum_; + MatrixPtr sftmaxDot_; + public: CpuMatrix(size_t height, size_t width, bool trans = false); CpuMatrix(real* data, size_t height, size_t width, bool trans = false) @@ -1728,6 +1732,7 @@ public: Matrix& prevGrad2); void softmax(Matrix& output); + void softmaxBackward(Matrix& outputV); void sequenceSoftmax(Matrix& output, const IVector& index); void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 370529ed97b1f1427ebc088a3031437a7f65e0cf..dc9c503e0b51b1ab379789806bc60b57d06659b0 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3171,6 +3171,22 @@ class RecurrentLayerGroup(LayerBase): name, 'recurrent_layer_group', 0, inputs=[], device=device) +@config_layer('pixel_softmax') +class PixelSoftmaxLayer(LayerBase): + def __init__(self, input, name, **xargs): + super(PixelSoftmaxLayer, self).__init__( + name, 'pixel_softmax', 0, inputs=inputs, **xargs) + + input_layer = self.get_input_layer(0) + image_conf = self.config.inputs[0].image_conf + image_conf.img_size = input_layer.width + image_conf.img_size_y = input_layer.height + image_conf.channels = input_layer.size / (input_layer.width * + input_layer.height) + self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size, + image_conf.channels) + + # Deprecated, use a new layer specific class instead @config_func def Layer(name, type, **xargs): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 206de1f8e1c7d3f9f977b4ca97522065c9ed0cab..fdac5984b08f90bf35f486d39439bea05d78af18 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -217,6 +217,7 @@ class LayerType(object): SMOOTH_L1 = 'smooth_l1' PRELU = 'prelu' + PIXEL_SOFTMAX_LAYER = 'pixel_softmax' @staticmethod def is_layer_type(type_name): @@ -5853,3 +5854,40 @@ def prelu_layer(input, layer_type=LayerType.PRELU, parents=input, size=l.config.size) + + +@layer_support() +@wrap_name_default('pixel_softmax') +def pixel_softmax_layer(input, name=None, layer_attr=None): + """ + This layer calculate softmax in image channel dimension + + The example usage is: + + .. code-block:: python + + prelu = pixel_softmax(input=layer, name='softmax') + + :param name: Name of this layer. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput + """ + if isinstance(input, LayerOutput): + input = [input] + elif isinstance(input, Projection): + input = [input] + else: + assert isinstance(input, collections.Sequence) + l = Layer( + inputs=[x.name for x in input], + name=name, + type=LayerType.PIXEL_SOFTMAX_LAYER, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + layer_type=LayerType.PIXEL_SOFTMAX_LAYER, + parents=input, + size=l.config.size)