diff --git a/paddle/gserver/layers/NormProjectionLayer.cpp b/paddle/gserver/layers/NormProjectionLayer.cpp index 03c6952c30b0ced99e832caea579512a5023b202..d6923c2192cf189749c9a2137e1bc63f1d4b0932 100644 --- a/paddle/gserver/layers/NormProjectionLayer.cpp +++ b/paddle/gserver/layers/NormProjectionLayer.cpp @@ -13,10 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "NormProjectionLayer.h" +#include "paddle/math/cross_map_normal_op.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" -#include "paddle/math/cross_map_normal_op.h" -#include "NormProjectionLayer.h" namespace paddle { size_t CMRProjectionNormLayer::getSize() { @@ -48,13 +47,23 @@ bool CMRProjectionNormLayer::init(const LayerMap& layerMap, CHECK_EQ(config_.inputs_size(), 1); if (useGpu_) { - normal_ = FunctionBase::funcRegistrar_.createByType( + forward_ = FunctionBase::funcRegistrar_.createByType( FUNC_NAME(CrossMapNormal, GPU)); } else { - normal_ = FunctionBase::funcRegistrar_.createByType( + forward_ = FunctionBase::funcRegistrar_.createByType( FUNC_NAME(CrossMapNormal, CPU)); } - normal_->init( + forward_->init( + FuncConfig().set("size", size_).set("scale", scale_).set("pow", pow_)); + + if (useGpu_) { + backward_ = FunctionBase::funcRegistrar_.createByType( + FUNC_NAME(CrossMapNormalGrad, GPU)); + } else { + backward_ = FunctionBase::funcRegistrar_.createByType( + FUNC_NAME(CrossMapNormalGrad, CPU)); + } + backward_->init( FuncConfig().set("size", size_).set("scale", scale_).set("pow", pow_)); return true; @@ -74,13 +83,13 @@ void CMRProjectionNormLayer::forward(PassType passType) { Matrix::resizeOrCreate(denoms_, batchSize, size, /* trans */ false, useGpu_); - Dims dims{(size_t)batchSize, - (size_t)channels_, - (size_t)imgSizeH_, - (size_t)imgSizeW_}; - normal_->calc( - {Tensor(input->getData(), dims)}, - {Tensor(outV->getData(), dims), Tensor(denoms_->getData(), dims)}, + dims_ = {(size_t)batchSize, + (size_t)channels_, + (size_t)imgSizeH_, + (size_t)imgSizeW_}; + forward_->calc( + {Tensor(input->getData(), dims_)}, + {Tensor(outV->getData(), dims_), Tensor(denoms_->getData(), dims_)}, {}); } @@ -96,6 +105,13 @@ void CMRProjectionNormLayer::backward(const UpdateCallback& callback) { MatrixPtr localOutV = getOutputValue(); MatrixPtr preOutV = inputLayers_[0]->getOutputValue(); + backward_->calc({Tensor(preOutV->getData(), dims_), + Tensor(localOutV->getData(), dims_), + Tensor(localGrad->getData(), dims_), + Tensor(denoms_->getData(), dims_)}, + {Tensor(preOutGrad->getData(), dims_)}, + {}); +#if 0 if (useGpu_) { CrossMapNormalGrad crossGrad; crossGrad(dynamic_cast(*preOutGrad), @@ -123,5 +139,6 @@ void CMRProjectionNormLayer::backward(const UpdateCallback& callback) { scale_, pow_); } +#endif } } // namespace paddle diff --git a/paddle/gserver/layers/NormProjectionLayer.h b/paddle/gserver/layers/NormProjectionLayer.h index 1dc3921283ca7d66310ef5394cc0a2433fcaae5a..82aa427f8d425da0d91251365a3762bd3e50700b 100644 --- a/paddle/gserver/layers/NormProjectionLayer.h +++ b/paddle/gserver/layers/NormProjectionLayer.h @@ -16,9 +16,8 @@ limitations under the License. */ #include #include "NormLayer.h" -#include "paddle/math/Matrix.h" #include "paddle/math/Function.h" -#include +#include "paddle/math/Matrix.h" namespace paddle { @@ -43,6 +42,8 @@ public: void backward(const UpdateCallback& callback = nullptr); protected: - FunctionBase* normal_; + Dims dims_; + FunctionBase* forward_; + FunctionBase* backward_; }; } // namespace paddle diff --git a/paddle/math/Function.h b/paddle/math/Function.h index f8fab972a6902086c11bb6c02bd2e81a7e3822ac..095584c0b19f7a0b7d8787a0bc6bbdd78d785eed 100644 --- a/paddle/math/Function.h +++ b/paddle/math/Function.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include -#include "paddle/utils/ClassRegistrar.h" #include "paddle/math/Matrix.h" +#include "paddle/utils/ClassRegistrar.h" namespace paddle { diff --git a/paddle/math/cross_map_normal_op.cpp b/paddle/math/cross_map_normal_op.cpp index e520351d2e3b82f913f59b24f5e8ce744dda1445..8547978c991603cb145107b5a3b529e7fe741d6c 100644 --- a/paddle/math/cross_map_normal_op.cpp +++ b/paddle/math/cross_map_normal_op.cpp @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "cross_map_normal_op.h" +#include "paddle/math/Vector.h" namespace paddle { @@ -56,66 +57,49 @@ void CrossMapNormal(real* outputs, } template <> -void CrossMapNormalGrad::operator()(CpuMatrix& inputsGrad, - CpuMatrix& inputsValue, - CpuMatrix& outputsGrad, - CpuMatrix& outputsValue, - CpuMatrix& denoms, - size_t channels, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - real scale, - real pow) { - CHECK(inputsGrad.isContiguous()); - CHECK(outputsGrad.isContiguous()); - CHECK(denoms.isContiguous()); - CHECK(inputsValue.isContiguous()); - CHECK(outputsValue.isContiguous()); - CHECK_EQ(inputsGrad.getHeight(), outputsGrad.getHeight()); - CHECK_EQ(inputsGrad.getWidth(), outputsGrad.getWidth()); - CHECK_EQ(inputsGrad.getHeight(), denoms.getHeight()); - CHECK_EQ(inputsGrad.getWidth(), denoms.getWidth()); - CHECK_EQ(inputsGrad.getHeight(), inputsValue.getHeight()); - CHECK_EQ(inputsGrad.getWidth(), inputsValue.getWidth()); - CHECK_EQ(inputsGrad.getHeight(), outputsValue.getHeight()); - CHECK_EQ(inputsGrad.getWidth(), outputsValue.getWidth()); - - size_t numSample = inputsGrad.getHeight(); - size_t numCols = inputsGrad.getWidth(); - size_t imageSize = imgSizeH * imgSizeW; - CHECK(imageSize * channels == numCols); - +void CrossMapNormalGrad(real* inputsGrad, + real* inputsValue, + real* outputsValue, + real* outputsGrad, + real* denoms, + size_t numSamples, + size_t channels, + size_t height, + size_t width, + size_t size, + real scale, + real pow) { + size_t oneSample = channels * height * width; std::function oneImage = [=](real* data, size_t offset) { - return CpuVector(imageSize, data + offset); + return CpuVector(height * width, data + offset); }; - const int start = -((int)sizeX) / 2; - const int end = (int)sizeX + start; + const int start = -((int)size) / 2; + const int end = (int)size + start; const real ratio = -(real)2 * scale * pow; - for (size_t i = 0; i < numSample; i++) { - size_t sOffset = i * numCols; - real* inputGradData = inputsGrad.getData() + sOffset; - real* inputData = inputsValue.getData() + sOffset; - real* denomData = denoms.getData() + sOffset; - real* outputGradData = outputsGrad.getData() + sOffset; - real* outputData = outputsValue.getData() + sOffset; + for (size_t i = 0; i < numSamples; i++) { + size_t sOffset = i * oneSample; + real* oneInputGrad = inputsGrad + sOffset; + real* oneInputValue = inputsValue + sOffset; + real* oneDenom = denoms + sOffset; + real* oneOutputGrad = outputsGrad + sOffset; + real* oneOutputValue = outputsValue + sOffset; for (int c = 0; c < (int)channels; c++) { - size_t cOffset = c * imageSize; - CpuVector inputGrad = oneImage(inputGradData, cOffset); - CpuVector inputValue = oneImage(inputData, cOffset); - CpuVector denom = oneImage(denomData, cOffset); - CpuVector outputGrad = oneImage(outputGradData, cOffset); + size_t cOffset = c * height * width; + CpuVector inputGrad = oneImage(oneInputGrad, cOffset); + CpuVector inputValue = oneImage(oneInputValue, cOffset); + CpuVector denom = oneImage(oneDenom, cOffset); + CpuVector outputGrad = oneImage(oneOutputGrad, cOffset); inputGrad = inputGrad + denom.pow(-pow) * outputGrad; for (int s = start; s < end; s++) { if (c + s >= 0 && c + s < (int)channels) { - size_t offset = (c + s) * imageSize; - CpuVector output = oneImage(outputData, offset); - CpuVector outputGrad = oneImage(outputGradData, offset); - CpuVector denom = oneImage(denomData, offset); + size_t offset = (c + s) * height * width; + CpuVector output = oneImage(oneOutputValue, offset); + CpuVector outputGrad = oneImage(oneOutputGrad, offset); + CpuVector denom = oneImage(oneDenom, offset); inputGrad += ((outputGrad * output * ratio) / denom) * inputValue; } @@ -124,6 +108,11 @@ void CrossMapNormalGrad::operator()(CpuMatrix& inputsGrad, } } +/** + * \param inputs[0] input value. + * \param outputs[0] output value. + * \param outputs[1] denoms. + */ template class CrossMapNormalFunc : public FunctionBase { public: @@ -169,7 +158,65 @@ private: real pow_; }; +/** + * \param inputs[0] input value. + * \param inputs[1] output value. + * \param inputs[2] output grad. + * \param inputs[3] denoms. + * \param outputs[0] input grad. + */ +template +class CrossMapNormalGradFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + size_ = config.get("size"); + scale_ = config.get("scale"); + pow_ = config.get("pow"); + } + + void calc(const Arguments& inputs, + const Arguments& outputs, + const Arguments& inouts) override { + CHECK_EQ(4, inputs.size()); + CHECK_EQ(1, outputs.size()); + CHECK_EQ(0, inouts.size()); + + CHECK_EQ(inputs[0].dims_.size(), 4); + for (size_t i = 0; i < inputs[0].dims_.size(); i++) { + CHECK_EQ(inputs[0].dims_[i], inputs[1].dims_[i]); + CHECK_EQ(inputs[0].dims_[i], inputs[2].dims_[i]); + CHECK_EQ(inputs[0].dims_[i], inputs[3].dims_[i]); + CHECK_EQ(inputs[0].dims_[i], outputs[0].dims_[i]); + } + + size_t samples = inputs[0].dims_[0]; + size_t channels = inputs[0].dims_[1]; + size_t height = inputs[0].dims_[2]; + size_t width = inputs[0].dims_[3]; + + CrossMapNormalGrad(outputs[0].getData(), + inputs[0].getData(), + inputs[1].getData(), + inputs[2].getData(), + inputs[3].getData(), + samples, + channels, + height, + width, + size_, + scale_, + pow_); + } + +private: + size_t size_; + real scale_; + real pow_; +}; + REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc); REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc); +REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc); +REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc); } // namespace paddle diff --git a/paddle/math/cross_map_normal_op.h b/paddle/math/cross_map_normal_op.h index ef9533485ec9cc3542a88ade461c36662c72552b..f065208084f1d46b2414246b7119ce7d2666e631 100644 --- a/paddle/math/cross_map_normal_op.h +++ b/paddle/math/cross_map_normal_op.h @@ -15,7 +15,6 @@ limitations under the License. */ #pragma once #include "Function.h" -#include "paddle/math/Matrix.h" namespace paddle { @@ -30,34 +29,19 @@ void CrossMapNormal(real* outputs, size_t size, real scale, real pow); -#if 0 -template -struct CrossMapNormal { - void operator()(typename MatrixT::type& outputs, - typename MatrixT::type& denoms, - typename MatrixT::type& inputs, - size_t channels, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - real scale, - real pow); -}; -#endif template -struct CrossMapNormalGrad { - void operator()(typename MatrixT::type& inputsGrad, - typename MatrixT::type& inputsValue, - typename MatrixT::type& outputsGrad, - typename MatrixT::type& outputsValue, - typename MatrixT::type& denoms, - size_t channels, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - real scale, - real pow); -}; +void CrossMapNormalGrad(real* inputsGrad, + real* inputsValue, + real* outputsValue, + real* outputsGrad, + real* denoms, + size_t numSamples, + size_t channels, + size_t height, + size_t width, + size_t size, + real scale, + real pow); } // namespace paddle diff --git a/paddle/math/cross_map_normal_op_gpu.cu b/paddle/math/cross_map_normal_op_gpu.cu index 9b929743449552215e29a96a31f0444a26b88b8d..6339c041948349a358b4a9c6e9a33dbe569bb5fe 100644 --- a/paddle/math/cross_map_normal_op_gpu.cu +++ b/paddle/math/cross_map_normal_op_gpu.cu @@ -131,48 +131,26 @@ __global__ void KeCMRNormDiff(size_t imageSize, const real* bottom_data, } template <> -void CrossMapNormalGrad::operator()(GpuMatrix& inputsGrad, - GpuMatrix& inputsValue, - GpuMatrix& outputsGrad, - GpuMatrix& outputsValue, - GpuMatrix& denoms, - size_t channels, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - real scale, - real pow) { - CHECK(inputsGrad.isContiguous()); - CHECK(outputsGrad.isContiguous()); - CHECK(denoms.isContiguous()); - CHECK(inputsValue.isContiguous()); - CHECK(outputsValue.isContiguous()); - CHECK_EQ(inputsGrad.getHeight(), outputsGrad.getHeight()); - CHECK_EQ(inputsGrad.getWidth(), outputsGrad.getWidth()); - CHECK_EQ(inputsGrad.getHeight(), denoms.getHeight()); - CHECK_EQ(inputsGrad.getWidth(), denoms.getWidth()); - CHECK_EQ(inputsGrad.getHeight(), inputsValue.getHeight()); - CHECK_EQ(inputsGrad.getWidth(), inputsValue.getWidth()); - CHECK_EQ(inputsGrad.getHeight(), outputsValue.getHeight()); - CHECK_EQ(inputsGrad.getWidth(), outputsValue.getWidth()); - - size_t numSample = inputsGrad.getHeight(); - size_t numCols = inputsGrad.getWidth(); - CHECK(imgSizeH * imgSizeW * channels == numCols); - - size_t imageSize = numSample * imgSizeH * imgSizeW; - real* inputsGradData = inputsGrad.getData(); - real* inputsData = inputsValue.getData(); - real* denomsData = denoms.getData(); - real* outputsGradData = outputsGrad.getData(); - real* outputsData = outputsValue.getData(); +void CrossMapNormalGrad(real* inputsGrad, + real* inputsValue, + real* outputsValue, + real* outputsGrad, + real* denoms, + size_t numSamples, + size_t channels, + size_t height, + size_t width, + size_t size, + real scale, + real pow) { + size_t imageSize = numSamples * height * width; int blockSize = 1024; int gridSize = (imageSize + 1024 - 1) / 1024; KeCMRNormDiff <<>> - (imageSize, inputsData, outputsData, denomsData, outputsGradData, channels, - imgSizeH, imgSizeW, sizeX, -pow, 2.0f * pow * scale, inputsGradData); - CHECK_SYNC("KeCMRNormDiff"); + (imageSize, inputsValue, outputsValue, denoms, outputsGrad, channels, + height, width, size, -pow, 2.0f * pow * scale, inputsGrad); + CHECK_SYNC("CrossMapNormalGrad"); } } // namespace paddle diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 0341d757f31b95218b7d1408037ab2a5112e86bf..bc146514572a4df301d19cf0afa44ef527dd445d 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -19,12 +19,11 @@ limitations under the License. */ #include #include "TensorCheck.h" #include "paddle/gserver/tests/TestUtil.h" +#include "paddle/math/Function.h" #include "paddle/math/Matrix.h" #include "paddle/math/SparseMatrix.h" -#include "paddle/utils/Stat.h" -#include "TensorCheck.h" #include "paddle/math/cross_map_normal_op.h" -#include "paddle/math/Function.h" +#include "paddle/utils/Stat.h" #include "paddle/utils/Util.h" using namespace paddle; // NOLINT @@ -1282,12 +1281,6 @@ void testCrossMapNormalFwd( inputsGpu.copyFrom(inputs); outputsGpu.copyFrom(outputs); -#if 0 - FuncConfig config; - config.set("size", (size_t)sizeX); - config.set("scale", scale); - config.set("pow", pow); -#endif FunctionBase* cpu = FunctionBase::funcRegistrar_.createByType(FUNC_NAME(CrossMapNormal, CPU)); FunctionBase* gpu = @@ -1311,22 +1304,6 @@ void testCrossMapNormalFwd( {Tensor(inputsGpu.getData(), dims)}, {Tensor(outputsGpu.getData(), dims), Tensor(denomsGpu.getData(), dims)}, {}); -#if 0 - CrossMapNormal cpuCross; - cpuCross( - outputs, denoms, inputs, channels, imgSizeH, imgSizeW, sizeX, scale, pow); - - CrossMapNormal gpuCross; - gpuCross(outputsGpu, - denomsGpu, - inputsGpu, - channels, - imgSizeH, - imgSizeW, - sizeX, - scale, - pow); -#endif TensorCheckErr(outputs, outputsGpu); TensorCheckErr(denoms, denomsGpu); @@ -1381,6 +1358,35 @@ void testCrossMapNormalBwd( outputsValueGpu.copyFrom(outputsValue); inputsGradGpu.copyFrom(inputsGrad); + FunctionBase* cpu = FunctionBase::funcRegistrar_.createByType( + FUNC_NAME(CrossMapNormalGrad, CPU)); + FunctionBase* gpu = FunctionBase::funcRegistrar_.createByType( + FUNC_NAME(CrossMapNormalGrad, GPU)); + cpu->init(FuncConfig() + .set("size", (size_t)sizeX) + .set("scale", scale) + .set("pow", pow)); + gpu->init(FuncConfig() + .set("size", (size_t)sizeX) + .set("scale", scale) + .set("pow", pow)); + + Dims dims{ + (size_t)numSamples, (size_t)channels, (size_t)imgSizeH, (size_t)imgSizeW}; + cpu->calc({Tensor(inputsValue.getData(), dims), + Tensor(outputsValue.getData(), dims), + Tensor(outputsGrad.getData(), dims), + Tensor(denoms.getData(), dims)}, + {Tensor(inputsGrad.getData(), dims)}, + {}); + + gpu->calc({Tensor(inputsValueGpu.getData(), dims), + Tensor(outputsValueGpu.getData(), dims), + Tensor(outputsGradGpu.getData(), dims), + Tensor(denomsGpu.getData(), dims)}, + {Tensor(inputsGradGpu.getData(), dims)}, + {}); +#if 0 CrossMapNormalGrad cpuCross; cpuCross(inputsGrad, inputsValue, @@ -1406,6 +1412,7 @@ void testCrossMapNormalBwd( sizeX, scale, pow); +#endif TensorCheckErr(inputsGrad, inputsGradGpu); }