diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.cpp b/paddle/gserver/layers/ExpandConvBaseLayer.cpp deleted file mode 100644 index 2b7bef0a757d7c706be3815c539b036b094596cf..0000000000000000000000000000000000000000 --- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "ExpandConvBaseLayer.h" - -#include "paddle/utils/Logging.h" -namespace paddle { - -bool ExpandConvBaseLayer::init(const LayerMap &layerMap, - const ParameterMap ¶meterMap) { - /* Initialize the basic convolutional parent class */ - ConvBaseLayer::init(layerMap, parameterMap); - - int index = 0; - for (auto &inputConfig : config_.inputs()) { - const ConvConfig &conf = inputConfig.conv_conf(); - /* Consistent caffe mode for multiple input */ - caffeMode_ = conf.caffe_mode(); - - // create a new weight - size_t height, width; - height = filterPixels_[index] * filterChannels_[index]; - width = (!isDeconv_) ? numFilters_ : channels_[index]; - CHECK_EQ(parameters_[index]->getSize(), width * height); - Weight *w = new Weight(height, width, parameters_[index]); - weights_.emplace_back(w); - index++; - } - if (biasParameter_.get()) { - if (sharedBiases_) { - CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); - biases_ = - std::unique_ptr(new Weight(numFilters_, 1, biasParameter_)); - } else { - biases_ = - std::unique_ptr(new Weight(getSize(), 1, biasParameter_)); - } - } - getOutputSize(); - - return true; -} - -size_t ExpandConvBaseLayer::getOutputSize() { - CHECK_NE(inputLayers_.size(), 0UL); - size_t layerSize = ConvBaseLayer::calOutputSize(); - return layerSize; -} - -void ExpandConvBaseLayer::addSharedBias() { - size_t mapW = getOutputSize() / numFilters_; - size_t mapH = getOutputValue()->getElementCnt() / mapW; - MatrixPtr out = - Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_); - - Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); - - out->transpose(transOutValue_, false); // false means no memory allocation - transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_, - numFilters_); - - MatrixPtr bias = Matrix::create(biases_->getW()->getData(), - 1, - biases_->getW()->getElementCnt(), - false, - useGpu_); - transOutValue_->addBias(*bias, 1.0f); - - transOutValue_->reshape(mapW, mapH); - transOutValue_->transpose(out, false); // false means no memory allocation - - out->clear(); - bias->clear(); -} - -void ExpandConvBaseLayer::addUnsharedBias() { - MatrixPtr outValue = getOutputValue(); - MatrixPtr bias = Matrix::create(biases_->getW()->getData(), - 1, - biases_->getW()->getElementCnt(), - false, - useGpu_); - outValue->addBias(*bias, 1.0f); -} - -void ExpandConvBaseLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) { - size_t mapW = getOutputSize() / numFilters_; - size_t mapH = v->getElementCnt() / mapW; - MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_); - - Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); - - vTmp->transpose(transOutValue_, false); // false means no memory allocation - transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_, - numFilters_); - biases->collectBias(*transOutValue_, 1.0f); -} - -void ExpandConvBaseLayer::bpropBiases(MatrixPtr v) { - MatrixPtr biases = Matrix::create(biases_->getWGrad()->getData(), - 1, - biases_->getWGrad()->getElementCnt(), - false, - useGpu_); - if (sharedBiases_) { - bpropSharedBias(biases, v); - } else { - biases->collectBias(*v, 1.0f); - } - biases->clear(); -} - -} // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.h b/paddle/gserver/layers/ExpandConvBaseLayer.h deleted file mode 100644 index 01c699d2344443a1887ec0b5005125f617cbe279..0000000000000000000000000000000000000000 --- a/paddle/gserver/layers/ExpandConvBaseLayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "ConvBaseLayer.h" -#include "paddle/math/Matrix.h" - -namespace paddle { - -/** - * @brief A subclass of ConvBaseLayer that is a superclass of both - * ExpandConvLayer and ExpandConvTransLayer - */ -class ExpandConvBaseLayer : public ConvBaseLayer { -protected: - /// The transpose of output, which is an auxiliary matrix. - MatrixPtr transOutValue_; - -public: - explicit ExpandConvBaseLayer(const LayerConfig& config) - : ConvBaseLayer(config) {} - - ~ExpandConvBaseLayer() {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - size_t getOutputSize(); - - /** - * Add shared bias. - */ - void addSharedBias(); - - /** - * Add unshared bias. - */ - void addUnsharedBias(); - - void bpropSharedBias(MatrixPtr biases, MatrixPtr v); - void bpropBiases(MatrixPtr v); -}; - -} // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index 20de475fc3f6b6f3c05ac26bea8363daff0cf110..48dfcb49a4c2c46891bb5236fc1f8e644c03f327 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -36,7 +36,36 @@ inline bool isDepthwiseConv(int channels, int groups) { bool ExpandConvLayer::init(const LayerMap &layerMap, const ParameterMap ¶meterMap) { /* Initialize the basic convolutional parent class */ - ExpandConvBaseLayer::init(layerMap, parameterMap); + ConvBaseLayer::init(layerMap, parameterMap); + + int index = 0; + for (auto &inputConfig : config_.inputs()) { + const ConvConfig &conf = inputConfig.conv_conf(); + /* Consistent caffe mode for multiple input */ + caffeMode_ = conf.caffe_mode(); + + // create a new weight + size_t height, width; + height = filterPixels_[index] * filterChannels_[index]; + width = (!isDeconv_) ? numFilters_ : channels_[index]; + CHECK_EQ(parameters_[index]->getSize(), width * height); + Weight *w = new Weight(height, width, parameters_[index]); + weights_.emplace_back(w); + index++; + } + + if (biasParameter_.get()) { + if (sharedBiases_) { + CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); + biases_ = std::unique_ptr( + new Weight(1, numFilters_, biasParameter_, 0)); + } else { + biases_ = + std::unique_ptr(new Weight(1, getSize(), biasParameter_, 0)); + } + } + + getOutputSize(); size_t numInputs = config_.inputs_size(); inputShape_.resize(numInputs); @@ -108,6 +137,12 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, return true; } +size_t ExpandConvLayer::getOutputSize() { + CHECK_NE(inputLayers_.size(), 0UL); + size_t layerSize = ConvBaseLayer::calOutputSize(); + return layerSize; +} + // i is the index of input layers #define BACKWARD_INPUT(i, inputs, outputs) \ backward_[2 * i]->calc(inputs, outputs) @@ -155,11 +190,7 @@ void ExpandConvLayer::forward(PassType passType) { /* add the bias-vector */ if (biases_.get()) { - if (sharedBiases_) { - addSharedBias(); - } else { - addUnsharedBias(); - } + output_.value->addBias(*biases_->getW(), 1.0, sharedBiases_); } /* activation */ @@ -171,7 +202,7 @@ void ExpandConvLayer::backward(const UpdateCallback &callback) { MatrixPtr outGrad = getOutputGrad(); if (biases_ && biases_->getWGrad()) { - bpropBiases(outGrad); + biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBiases_); /* Increasing the number of gradient */ biases_->getParameterPtr()->incUpdate(callback); } diff --git a/paddle/gserver/layers/ExpandConvLayer.h b/paddle/gserver/layers/ExpandConvLayer.h index a1f943d1521547af0f82cec7da8a4efe9037cd71..a0873de19253f2496bc0c2fba550b3199dfc7486 100644 --- a/paddle/gserver/layers/ExpandConvLayer.h +++ b/paddle/gserver/layers/ExpandConvLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include -#include "ExpandConvBaseLayer.h" +#include "ConvBaseLayer.h" #include "paddle/math/Matrix.h" namespace paddle { @@ -28,10 +28,9 @@ namespace paddle { * The config file api is img_conv_layer. */ -class ExpandConvLayer : public ExpandConvBaseLayer { +class ExpandConvLayer : public ConvBaseLayer { public: - explicit ExpandConvLayer(const LayerConfig& config) - : ExpandConvBaseLayer(config) {} + explicit ExpandConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {} ~ExpandConvLayer() {} @@ -41,6 +40,8 @@ public: void forward(PassType passType) override; void backward(const UpdateCallback& callback) override; + size_t getOutputSize(); + protected: std::vector inputShape_; std::vector filterShape_; diff --git a/paddle/operators/sequence_avg_pool_op.cc b/paddle/operators/sequence_avg_pool_op.cc index eb3e37655bc7eae1a3cf1348434e33a415947cad..9815b8f3a8d813959949bbfedc79f404721a8216 100644 --- a/paddle/operators/sequence_avg_pool_op.cc +++ b/paddle/operators/sequence_avg_pool_op.cc @@ -63,7 +63,9 @@ class SequenceAvgPoolGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& ctx) const override { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), - "Gradient of Out should not be null"); + "Gradient of Out should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "The input X should not be null."); auto og_dims = ctx.Input(framework::GradVarName("Out"))->dims(); auto x_dims = ctx.Input("X")->dims(); diff --git a/paddle/operators/sequence_avg_pool_op.h b/paddle/operators/sequence_avg_pool_op.h index 6e343b87e2938399409498407ac46b2416dc2231..ebe0956344eb71d0fb2836f1b4a989ac546d9f78 100644 --- a/paddle/operators/sequence_avg_pool_op.h +++ b/paddle/operators/sequence_avg_pool_op.h @@ -21,6 +21,9 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; +template +using EigenVector = framework::EigenVector; template using EigenMatrix = framework::EigenMatrix; @@ -43,8 +46,8 @@ class SequenceAvgPoolKernel : public framework::OpKernel { static_cast(lod[0][i + 1])); Tensor out_t = out->Slice(i, i + 1); int64_t h = static_cast(lod[0][i + 1] - lod[0][i]); - auto in_e = EigenMatrix::From(in_t, {h, w}); - auto out_e = EigenMatrix::From(out_t, {h, w}); + auto in_e = EigenMatrix::From(in_t, framework::make_ddim({h, w})); + auto out_e = EigenVector::Flatten(out_t); out_e.device(place) = in_e.mean(Eigen::array({{0}})); } } @@ -54,9 +57,9 @@ template class SequenceAvgPoolGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Output("X"); - auto* in_g = context.Output(framework::GradVarName("X")); + auto* in = context.Input("X"); auto* out_g = context.Input(framework::GradVarName("Out")); + auto* in_g = context.Output(framework::GradVarName("X")); auto dims = in->dims(); auto lod = in->lod(); @@ -71,7 +74,7 @@ class SequenceAvgPoolGradKernel : public framework::OpKernel { int64_t h = static_cast(lod[0][i + 1] - lod[0][i]); auto in_g_e = EigenMatrix::From(in_g_t, {h, w}); auto out_g_e = EigenMatrix::From(out_g_t, {1, w}); - Eigen::DSizes bcast(h, w); + Eigen::DSizes bcast(h, 1); in_g_e.device(place) = (out_g_e / static_cast(h)).broadcast(bcast); } } diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index 58ea40cb35679722cb7ae905c98e7b50906866b6..31724d98edff26c56f8fbf2992843ce76ee2cd85 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -47,17 +47,24 @@ def set_input(scope, op, inputs, place): if in_name in inputs: if in_dup: sub_in = inputs[in_name] - for sub_in_name, sub_in_array in sub_in: + for sub_in_name, sub_in_val in sub_in: var = scope.find_var(sub_in_name) tensor = var.get_tensor() + sub_in_array = sub_in_val[0] \ + if isinstance(sub_in_val, tuple) else sub_in_val tensor.set_dims(sub_in_array.shape) tensor.set(sub_in_array, place) + if isinstance(sub_in_val, tuple): + tensor.set_lod(sub_in_val[1]) else: var = scope.find_var(in_name) tensor = var.get_tensor() - arr = inputs[in_name] - tensor.set_dims(arr.shape) - tensor.set(arr, place) + in_val = inputs[in_name] + in_array = in_val[0] if isinstance(in_val, tuple) else in_val + tensor.set_dims(in_array.shape) + tensor.set(in_array, place) + if isinstance(in_val, tuple): + tensor.set_lod(in_val[1]) def set_output_grad(scope, op, outputs, place): diff --git a/python/paddle/v2/framework/tests/test_seq_pool.py b/python/paddle/v2/framework/tests/test_seq_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..cf864936af6361da1f16df3cfb759b468214b970 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_seq_pool.py @@ -0,0 +1,51 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestSeqAvgPool1D(OpTest): + def setUp(self): + self.op_type = 'sequence_avg_pool' + # one level, batch size is 4 + x = np.random.uniform(0.1, 1, [11, 23]).astype('float32') + lod = [[0, 4, 5, 8, 11]] + + out = np.zeros((4, 23)).astype('float32') + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + out[i] = sub_x.mean(axis=0) + + self.inputs = {'X': (x, lod)} + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + +class TestSeqAvgPool2D(OpTest): + def setUp(self): + self.op_type = 'sequence_avg_pool' + # one level, batch size is 4 + x = np.random.uniform(0.1, 1, [13, 3, 17]).astype('float32') + lod = [[0, 4, 5, 8, 13]] + + out = np.zeros((4, 3, 17)).astype('float32') + for i in range(4): + sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) + out[i] = np.reshape(sub_x.mean(axis=0), (3, 17)) + + self.inputs = {'X': (x, lod)} + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + +if __name__ == '__main__': + unittest.main()