提交 198164ad 编写于 作者: X xzl

use the expandconvlayer forward and backward, add the explain for class

上级 064dc888
...@@ -81,7 +81,6 @@ public: ...@@ -81,7 +81,6 @@ public:
int paddingH, int paddingH,
int paddingW, int paddingW,
T* colData, T* colData,
T* multiplierData,
T* filterGrad) {} T* filterGrad) {}
}; };
...@@ -247,7 +246,6 @@ public: ...@@ -247,7 +246,6 @@ public:
real* outputGrad = inputs[0].data<real>(); real* outputGrad = inputs[0].data<real>();
real* inputData = inputs[1].data<real>(); real* inputData = inputs[1].data<real>();
real* multiplierData = inputs[2].data<real>();
real* filterGrad = outputs[0].data<real>(); real* filterGrad = outputs[0].data<real>();
int size = int size =
...@@ -273,7 +271,6 @@ public: ...@@ -273,7 +271,6 @@ public:
paddingH(), paddingH(),
paddingW(), paddingW(),
colData, colData,
multiplierData,
filterGrad); filterGrad);
} }
}; };
......
...@@ -148,9 +148,7 @@ public: ...@@ -148,9 +148,7 @@ public:
int paddingH, int paddingH,
int paddingW, int paddingW,
T* colData, T* colData,
T* multiplierData,
T* filterGrad); T* filterGrad);
};
}; // namespace paddle
} // namespace paddle } // namespace paddle
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "DepthwiseConvOp.h" #include "DepthwiseConvOp.h"
#include "GemmFunctor.h" #include "GemmFunctor.h"
#include "paddle/math/BaseMatrix.h"
namespace paddle { namespace paddle {
// CUDA kernel to compute the depthwise convolution forward pass // CUDA kernel to compute the depthwise convolution forward pass
...@@ -266,7 +267,6 @@ public: ...@@ -266,7 +267,6 @@ public:
int paddingH, int paddingH,
int paddingW, int paddingW,
T* colData, T* colData,
T* multiplierData,
T* filterGrad){ T* filterGrad){
int colDataSize = inputChannels * filterHeight * filterWidth * outputHeight * outputWidth; int colDataSize = inputChannels * filterHeight * filterWidth * outputHeight * outputWidth;
...@@ -276,6 +276,7 @@ public: ...@@ -276,6 +276,7 @@ public:
size_t blockY = (blocks+512-1)/512; size_t blockY = (blocks+512-1)/512;
dim3 threads(1024, 1); dim3 threads(1024, 1);
dim3 grid(blockX, blockY); dim3 grid(blockX, blockY);
BaseMatrix filterGradMatrix(inputChannels * filterHeight * filterWidth, 1, filterGrad, false, true);
for(int i = 0; i < batchSize; i++) { for(int i = 0; i < batchSize; i++) {
ConvolutionDepthwiseFilterBackward<T> ConvolutionDepthwiseFilterBackward<T>
...@@ -298,25 +299,12 @@ public: ...@@ -298,25 +299,12 @@ public:
paddingW, paddingW,
colData colData
); );
GemmFunctor<DEVICE_TYPE_GPU, real> gemm;
int M = colDataSize / outputHeight / outputWidth; int M = colDataSize / outputHeight / outputWidth;
int N = 1;
int K = outputHeight * outputWidth; int K = outputHeight * outputWidth;
gemm(CblasNoTrans,
CblasNoTrans, BaseMatrix colMatrix(M, K, colData, false, true);
M, filterGradMatrix.sumRows(colMatrix, (T)1.0, (T)1.0);
N,
K,
(T)1.0,
colData,
K,
multiplierData,
N,
(T)1.0,
filterGrad,
N);
} }
//gemv
} }
}; };
......
...@@ -29,18 +29,10 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap, ...@@ -29,18 +29,10 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap,
inputShape_.resize(numInputs); inputShape_.resize(numInputs);
filterShape_.resize(numInputs); filterShape_.resize(numInputs);
outputShape_.resize(numInputs); outputShape_.resize(numInputs);
multiplierShape_.resize(numInputs);
weightMultiplier_.resize(numInputs);
for (int i = 0; i < config_.inputs_size(); i++) { for (int i = 0; i < config_.inputs_size(); i++) {
std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]};
std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]}; std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]};
Matrix::resizeOrCreate(weightMultiplier_[i],
(size_t)outputH_[i] * (size_t)outputW_[i],
(size_t)1,
false,
useGpu_);
weightMultiplier_[i]->one();
createFunction(forward_, createFunction(forward_,
"DepthwiseConv", "DepthwiseConv",
FuncConfig() FuncConfig()
...@@ -65,100 +57,4 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap, ...@@ -65,100 +57,4 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap,
return true; return true;
} }
// i is the index of input layers
#define BACKWARD_INPUT(i, inputs, outputs) \
backward_[2 * i]->calc(inputs, outputs)
#define BACKWARD_FILTER(i, inputs, outputs) \
backward_[2 * i + 1]->calc(inputs, outputs)
// compute the depthwise convolution forward pass
void DepthwiseConvLayer::forward(PassType passType) {
Layer::forward(passType);
size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight();
resetOutput(batchSize, getOutputSize());
// Calculate the shape of the input, output, and filter.
for (size_t i = 0; i < inputLayers_.size(); ++i) {
inputShape_[i] = TensorShape({(size_t)batchSize,
(size_t)channels_[i],
(size_t)imgSizeH_[i],
(size_t)imgSizeW_[i]});
multiplierShape_[i] =
TensorShape({(size_t)outputH_[i] * (size_t)outputW_[i], (size_t)1});
filterShape_[i] = TensorShape({(size_t)groups_[i],
(size_t)numFilters_ / groups_[i],
(size_t)channels_[i] / groups_[i],
(size_t)filterSizeY_[i],
(size_t)filterSize_[i]});
outputShape_[i] = TensorShape({(size_t)batchSize,
(size_t)numFilters_,
(size_t)outputH_[i],
(size_t)outputW_[i]});
}
// Calculate the output value.
for (size_t i = 0; i < inputLayers_.size(); ++i) {
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getInputValue(i), inputShape_[i]);
inputs.addArg(*weights_[i]->getW(), filterShape_[i]);
outputs.addArg(
*getOutputValue(), outputShape_[i], i == 0 ? ASSIGN_TO : ADD_TO);
forward_[i]->calc(inputs, outputs);
}
/* add the bias-vector */
if (biases_.get()) {
if (sharedBiases_) {
addSharedBias();
} else {
addUnsharedBias();
}
}
/* activation */
forwardActivation();
}
// compute the depthwise convolution backprop.
void DepthwiseConvLayer::backward(const UpdateCallback &callback) {
backwardActivation();
MatrixPtr outGrad = getOutputGrad();
if (biases_ && biases_->getWGrad()) {
bpropBiases(outGrad);
/* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback);
}
// Calculate the input grad and filter grad.
for (size_t i = 0; i < inputLayers_.size(); ++i) {
if (getInputGrad(i)) {
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getOutputGrad(), outputShape_[i]);
inputs.addArg(*weights_[i]->getW(), filterShape_[i]);
outputs.addArg(*getInputGrad(i), inputShape_[i], ADD_TO);
BACKWARD_INPUT(i, inputs, outputs);
}
if (weights_[i]->getWGrad()) {
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getOutputGrad(), outputShape_[i]);
inputs.addArg(*getInputValue(i), inputShape_[i]);
inputs.addArg(*weightMultiplier_[i], multiplierShape_[i]);
// weight_multiplier
outputs.addArg(*weights_[i]->getWGrad(), filterShape_[i], ADD_TO);
BACKWARD_FILTER(i, inputs, outputs);
/* Increasing the number of gradient */
weights_[i]->getParameterPtr()->incUpdate(callback);
}
}
}
} // namespace paddle } // namespace paddle
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <vector> #include <vector>
#include "ExpandConvBaseLayer.h" #include "ExpandConvLayer.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
namespace paddle { namespace paddle {
...@@ -26,25 +26,15 @@ namespace paddle { ...@@ -26,25 +26,15 @@ namespace paddle {
* The config file api is img_depthwise_conv_layer. * The config file api is img_depthwise_conv_layer.
*/ */
class DepthwiseConvLayer : public ExpandConvBaseLayer { class DepthwiseConvLayer : public ExpandConvLayer {
public: public:
explicit DepthwiseConvLayer(const LayerConfig& config) explicit DepthwiseConvLayer(const LayerConfig& config)
: ExpandConvBaseLayer(config) {} : ExpandConvLayer(config) {}
~DepthwiseConvLayer() {} ~DepthwiseConvLayer() {}
bool init(const LayerMap& layerMap, bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
protected:
std::vector<TensorShape> inputShape_;
std::vector<TensorShape> filterShape_;
std::vector<TensorShape> outputShape_;
std::vector<TensorShape> multiplierShape_;
std::vector<MatrixPtr> weightMultiplier_;
}; };
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册