提交 d2d00106 编写于 作者: H hedaoyuan

add CrossMapNormalGradFunc

上级 9171ab0a
...@@ -13,10 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,10 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "NormProjectionLayer.h" #include "NormProjectionLayer.h"
#include "paddle/math/cross_map_normal_op.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#include "paddle/math/cross_map_normal_op.h"
#include "NormProjectionLayer.h"
namespace paddle { namespace paddle {
size_t CMRProjectionNormLayer::getSize() { size_t CMRProjectionNormLayer::getSize() {
...@@ -48,13 +47,23 @@ bool CMRProjectionNormLayer::init(const LayerMap& layerMap, ...@@ -48,13 +47,23 @@ bool CMRProjectionNormLayer::init(const LayerMap& layerMap,
CHECK_EQ(config_.inputs_size(), 1); CHECK_EQ(config_.inputs_size(), 1);
if (useGpu_) { if (useGpu_) {
normal_ = FunctionBase::funcRegistrar_.createByType( forward_ = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormal, GPU)); FUNC_NAME(CrossMapNormal, GPU));
} else { } else {
normal_ = FunctionBase::funcRegistrar_.createByType( forward_ = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormal, CPU)); FUNC_NAME(CrossMapNormal, CPU));
} }
normal_->init( forward_->init(
FuncConfig().set("size", size_).set("scale", scale_).set("pow", pow_));
if (useGpu_) {
backward_ = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormalGrad, GPU));
} else {
backward_ = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormalGrad, CPU));
}
backward_->init(
FuncConfig().set("size", size_).set("scale", scale_).set("pow", pow_)); FuncConfig().set("size", size_).set("scale", scale_).set("pow", pow_));
return true; return true;
...@@ -74,13 +83,13 @@ void CMRProjectionNormLayer::forward(PassType passType) { ...@@ -74,13 +83,13 @@ void CMRProjectionNormLayer::forward(PassType passType) {
Matrix::resizeOrCreate(denoms_, batchSize, size, /* trans */ false, useGpu_); Matrix::resizeOrCreate(denoms_, batchSize, size, /* trans */ false, useGpu_);
Dims dims{(size_t)batchSize, dims_ = {(size_t)batchSize,
(size_t)channels_, (size_t)channels_,
(size_t)imgSizeH_, (size_t)imgSizeH_,
(size_t)imgSizeW_}; (size_t)imgSizeW_};
normal_->calc( forward_->calc(
{Tensor(input->getData(), dims)}, {Tensor(input->getData(), dims_)},
{Tensor(outV->getData(), dims), Tensor(denoms_->getData(), dims)}, {Tensor(outV->getData(), dims_), Tensor(denoms_->getData(), dims_)},
{}); {});
} }
...@@ -96,6 +105,13 @@ void CMRProjectionNormLayer::backward(const UpdateCallback& callback) { ...@@ -96,6 +105,13 @@ void CMRProjectionNormLayer::backward(const UpdateCallback& callback) {
MatrixPtr localOutV = getOutputValue(); MatrixPtr localOutV = getOutputValue();
MatrixPtr preOutV = inputLayers_[0]->getOutputValue(); MatrixPtr preOutV = inputLayers_[0]->getOutputValue();
backward_->calc({Tensor(preOutV->getData(), dims_),
Tensor(localOutV->getData(), dims_),
Tensor(localGrad->getData(), dims_),
Tensor(denoms_->getData(), dims_)},
{Tensor(preOutGrad->getData(), dims_)},
{});
#if 0
if (useGpu_) { if (useGpu_) {
CrossMapNormalGrad<DEVICE_TYPE_GPU> crossGrad; CrossMapNormalGrad<DEVICE_TYPE_GPU> crossGrad;
crossGrad(dynamic_cast<GpuMatrix&>(*preOutGrad), crossGrad(dynamic_cast<GpuMatrix&>(*preOutGrad),
...@@ -123,5 +139,6 @@ void CMRProjectionNormLayer::backward(const UpdateCallback& callback) { ...@@ -123,5 +139,6 @@ void CMRProjectionNormLayer::backward(const UpdateCallback& callback) {
scale_, scale_,
pow_); pow_);
} }
#endif
} }
} // namespace paddle } // namespace paddle
...@@ -16,9 +16,8 @@ limitations under the License. */ ...@@ -16,9 +16,8 @@ limitations under the License. */
#include <vector> #include <vector>
#include "NormLayer.h" #include "NormLayer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Function.h" #include "paddle/math/Function.h"
#include <vector> #include "paddle/math/Matrix.h"
namespace paddle { namespace paddle {
...@@ -43,6 +42,8 @@ public: ...@@ -43,6 +42,8 @@ public:
void backward(const UpdateCallback& callback = nullptr); void backward(const UpdateCallback& callback = nullptr);
protected: protected:
FunctionBase* normal_; Dims dims_;
FunctionBase* forward_;
FunctionBase* backward_;
}; };
} // namespace paddle } // namespace paddle
...@@ -16,8 +16,8 @@ limitations under the License. */ ...@@ -16,8 +16,8 @@ limitations under the License. */
#include <map> #include <map>
#include <vector> #include <vector>
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/utils/ClassRegistrar.h"
namespace paddle { namespace paddle {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "cross_map_normal_op.h" #include "cross_map_normal_op.h"
#include "paddle/math/Vector.h"
namespace paddle { namespace paddle {
...@@ -56,66 +57,49 @@ void CrossMapNormal<DEVICE_TYPE_CPU>(real* outputs, ...@@ -56,66 +57,49 @@ void CrossMapNormal<DEVICE_TYPE_CPU>(real* outputs,
} }
template <> template <>
void CrossMapNormalGrad<DEVICE_TYPE_CPU>::operator()(CpuMatrix& inputsGrad, void CrossMapNormalGrad<DEVICE_TYPE_CPU>(real* inputsGrad,
CpuMatrix& inputsValue, real* inputsValue,
CpuMatrix& outputsGrad, real* outputsValue,
CpuMatrix& outputsValue, real* outputsGrad,
CpuMatrix& denoms, real* denoms,
size_t numSamples,
size_t channels, size_t channels,
size_t imgSizeH, size_t height,
size_t imgSizeW, size_t width,
size_t sizeX, size_t size,
real scale, real scale,
real pow) { real pow) {
CHECK(inputsGrad.isContiguous()); size_t oneSample = channels * height * width;
CHECK(outputsGrad.isContiguous());
CHECK(denoms.isContiguous());
CHECK(inputsValue.isContiguous());
CHECK(outputsValue.isContiguous());
CHECK_EQ(inputsGrad.getHeight(), outputsGrad.getHeight());
CHECK_EQ(inputsGrad.getWidth(), outputsGrad.getWidth());
CHECK_EQ(inputsGrad.getHeight(), denoms.getHeight());
CHECK_EQ(inputsGrad.getWidth(), denoms.getWidth());
CHECK_EQ(inputsGrad.getHeight(), inputsValue.getHeight());
CHECK_EQ(inputsGrad.getWidth(), inputsValue.getWidth());
CHECK_EQ(inputsGrad.getHeight(), outputsValue.getHeight());
CHECK_EQ(inputsGrad.getWidth(), outputsValue.getWidth());
size_t numSample = inputsGrad.getHeight();
size_t numCols = inputsGrad.getWidth();
size_t imageSize = imgSizeH * imgSizeW;
CHECK(imageSize * channels == numCols);
std::function<CpuVector(real*, size_t)> oneImage = [=](real* data, std::function<CpuVector(real*, size_t)> oneImage = [=](real* data,
size_t offset) { size_t offset) {
return CpuVector(imageSize, data + offset); return CpuVector(height * width, data + offset);
}; };
const int start = -((int)sizeX) / 2; const int start = -((int)size) / 2;
const int end = (int)sizeX + start; const int end = (int)size + start;
const real ratio = -(real)2 * scale * pow; const real ratio = -(real)2 * scale * pow;
for (size_t i = 0; i < numSample; i++) { for (size_t i = 0; i < numSamples; i++) {
size_t sOffset = i * numCols; size_t sOffset = i * oneSample;
real* inputGradData = inputsGrad.getData() + sOffset; real* oneInputGrad = inputsGrad + sOffset;
real* inputData = inputsValue.getData() + sOffset; real* oneInputValue = inputsValue + sOffset;
real* denomData = denoms.getData() + sOffset; real* oneDenom = denoms + sOffset;
real* outputGradData = outputsGrad.getData() + sOffset; real* oneOutputGrad = outputsGrad + sOffset;
real* outputData = outputsValue.getData() + sOffset; real* oneOutputValue = outputsValue + sOffset;
for (int c = 0; c < (int)channels; c++) { for (int c = 0; c < (int)channels; c++) {
size_t cOffset = c * imageSize; size_t cOffset = c * height * width;
CpuVector inputGrad = oneImage(inputGradData, cOffset); CpuVector inputGrad = oneImage(oneInputGrad, cOffset);
CpuVector inputValue = oneImage(inputData, cOffset); CpuVector inputValue = oneImage(oneInputValue, cOffset);
CpuVector denom = oneImage(denomData, cOffset); CpuVector denom = oneImage(oneDenom, cOffset);
CpuVector outputGrad = oneImage(outputGradData, cOffset); CpuVector outputGrad = oneImage(oneOutputGrad, cOffset);
inputGrad = inputGrad + denom.pow(-pow) * outputGrad; inputGrad = inputGrad + denom.pow(-pow) * outputGrad;
for (int s = start; s < end; s++) { for (int s = start; s < end; s++) {
if (c + s >= 0 && c + s < (int)channels) { if (c + s >= 0 && c + s < (int)channels) {
size_t offset = (c + s) * imageSize; size_t offset = (c + s) * height * width;
CpuVector output = oneImage(outputData, offset); CpuVector output = oneImage(oneOutputValue, offset);
CpuVector outputGrad = oneImage(outputGradData, offset); CpuVector outputGrad = oneImage(oneOutputGrad, offset);
CpuVector denom = oneImage(denomData, offset); CpuVector denom = oneImage(oneDenom, offset);
inputGrad += ((outputGrad * output * ratio) / denom) * inputValue; inputGrad += ((outputGrad * output * ratio) / denom) * inputValue;
} }
...@@ -124,6 +108,11 @@ void CrossMapNormalGrad<DEVICE_TYPE_CPU>::operator()(CpuMatrix& inputsGrad, ...@@ -124,6 +108,11 @@ void CrossMapNormalGrad<DEVICE_TYPE_CPU>::operator()(CpuMatrix& inputsGrad,
} }
} }
/**
* \param inputs[0] input value.
* \param outputs[0] output value.
* \param outputs[1] denoms.
*/
template <DeviceType Device> template <DeviceType Device>
class CrossMapNormalFunc : public FunctionBase { class CrossMapNormalFunc : public FunctionBase {
public: public:
...@@ -169,7 +158,65 @@ private: ...@@ -169,7 +158,65 @@ private:
real pow_; real pow_;
}; };
/**
* \param inputs[0] input value.
* \param inputs[1] output value.
* \param inputs[2] output grad.
* \param inputs[3] denoms.
* \param outputs[0] input grad.
*/
template <DeviceType Device>
class CrossMapNormalGradFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
size_ = config.get<size_t>("size");
scale_ = config.get<real>("scale");
pow_ = config.get<real>("pow");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(4, inputs.size());
CHECK_EQ(1, outputs.size());
CHECK_EQ(0, inouts.size());
CHECK_EQ(inputs[0].dims_.size(), 4);
for (size_t i = 0; i < inputs[0].dims_.size(); i++) {
CHECK_EQ(inputs[0].dims_[i], inputs[1].dims_[i]);
CHECK_EQ(inputs[0].dims_[i], inputs[2].dims_[i]);
CHECK_EQ(inputs[0].dims_[i], inputs[3].dims_[i]);
CHECK_EQ(inputs[0].dims_[i], outputs[0].dims_[i]);
}
size_t samples = inputs[0].dims_[0];
size_t channels = inputs[0].dims_[1];
size_t height = inputs[0].dims_[2];
size_t width = inputs[0].dims_[3];
CrossMapNormalGrad<Device>(outputs[0].getData(),
inputs[0].getData(),
inputs[1].getData(),
inputs[2].getData(),
inputs[3].getData(),
samples,
channels,
height,
width,
size_,
scale_,
pow_);
}
private:
size_t size_;
real scale_;
real pow_;
};
REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc); REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc); REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc);
} // namespace paddle } // namespace paddle
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once #pragma once
#include "Function.h" #include "Function.h"
#include "paddle/math/Matrix.h"
namespace paddle { namespace paddle {
...@@ -30,34 +29,19 @@ void CrossMapNormal(real* outputs, ...@@ -30,34 +29,19 @@ void CrossMapNormal(real* outputs,
size_t size, size_t size,
real scale, real scale,
real pow); real pow);
#if 0
template <DeviceType Device>
struct CrossMapNormal {
void operator()(typename MatrixT<Device>::type& outputs,
typename MatrixT<Device>::type& denoms,
typename MatrixT<Device>::type& inputs,
size_t channels,
size_t imgSizeH,
size_t imgSizeW,
size_t sizeX,
real scale,
real pow);
};
#endif
template <DeviceType Device> template <DeviceType Device>
struct CrossMapNormalGrad { void CrossMapNormalGrad(real* inputsGrad,
void operator()(typename MatrixT<Device>::type& inputsGrad, real* inputsValue,
typename MatrixT<Device>::type& inputsValue, real* outputsValue,
typename MatrixT<Device>::type& outputsGrad, real* outputsGrad,
typename MatrixT<Device>::type& outputsValue, real* denoms,
typename MatrixT<Device>::type& denoms, size_t numSamples,
size_t channels, size_t channels,
size_t imgSizeH, size_t height,
size_t imgSizeW, size_t width,
size_t sizeX, size_t size,
real scale, real scale,
real pow); real pow);
};
} // namespace paddle } // namespace paddle
...@@ -131,48 +131,26 @@ __global__ void KeCMRNormDiff(size_t imageSize, const real* bottom_data, ...@@ -131,48 +131,26 @@ __global__ void KeCMRNormDiff(size_t imageSize, const real* bottom_data,
} }
template <> template <>
void CrossMapNormalGrad<DEVICE_TYPE_GPU>::operator()(GpuMatrix& inputsGrad, void CrossMapNormalGrad<DEVICE_TYPE_GPU>(real* inputsGrad,
GpuMatrix& inputsValue, real* inputsValue,
GpuMatrix& outputsGrad, real* outputsValue,
GpuMatrix& outputsValue, real* outputsGrad,
GpuMatrix& denoms, real* denoms,
size_t numSamples,
size_t channels, size_t channels,
size_t imgSizeH, size_t height,
size_t imgSizeW, size_t width,
size_t sizeX, size_t size,
real scale, real scale,
real pow) { real pow) {
CHECK(inputsGrad.isContiguous()); size_t imageSize = numSamples * height * width;
CHECK(outputsGrad.isContiguous());
CHECK(denoms.isContiguous());
CHECK(inputsValue.isContiguous());
CHECK(outputsValue.isContiguous());
CHECK_EQ(inputsGrad.getHeight(), outputsGrad.getHeight());
CHECK_EQ(inputsGrad.getWidth(), outputsGrad.getWidth());
CHECK_EQ(inputsGrad.getHeight(), denoms.getHeight());
CHECK_EQ(inputsGrad.getWidth(), denoms.getWidth());
CHECK_EQ(inputsGrad.getHeight(), inputsValue.getHeight());
CHECK_EQ(inputsGrad.getWidth(), inputsValue.getWidth());
CHECK_EQ(inputsGrad.getHeight(), outputsValue.getHeight());
CHECK_EQ(inputsGrad.getWidth(), outputsValue.getWidth());
size_t numSample = inputsGrad.getHeight();
size_t numCols = inputsGrad.getWidth();
CHECK(imgSizeH * imgSizeW * channels == numCols);
size_t imageSize = numSample * imgSizeH * imgSizeW;
real* inputsGradData = inputsGrad.getData();
real* inputsData = inputsValue.getData();
real* denomsData = denoms.getData();
real* outputsGradData = outputsGrad.getData();
real* outputsData = outputsValue.getData();
int blockSize = 1024; int blockSize = 1024;
int gridSize = (imageSize + 1024 - 1) / 1024; int gridSize = (imageSize + 1024 - 1) / 1024;
KeCMRNormDiff <<<gridSize, blockSize, 0, STREAM_DEFAULT>>> KeCMRNormDiff <<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
(imageSize, inputsData, outputsData, denomsData, outputsGradData, channels, (imageSize, inputsValue, outputsValue, denoms, outputsGrad, channels,
imgSizeH, imgSizeW, sizeX, -pow, 2.0f * pow * scale, inputsGradData); height, width, size, -pow, 2.0f * pow * scale, inputsGrad);
CHECK_SYNC("KeCMRNormDiff"); CHECK_SYNC("CrossMapNormalGrad");
} }
} // namespace paddle } // namespace paddle
...@@ -19,12 +19,11 @@ limitations under the License. */ ...@@ -19,12 +19,11 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "TensorCheck.h" #include "TensorCheck.h"
#include "paddle/gserver/tests/TestUtil.h" #include "paddle/gserver/tests/TestUtil.h"
#include "paddle/math/Function.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Stat.h"
#include "TensorCheck.h"
#include "paddle/math/cross_map_normal_op.h" #include "paddle/math/cross_map_normal_op.h"
#include "paddle/math/Function.h" #include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h" #include "paddle/utils/Util.h"
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
...@@ -1282,12 +1281,6 @@ void testCrossMapNormalFwd( ...@@ -1282,12 +1281,6 @@ void testCrossMapNormalFwd(
inputsGpu.copyFrom(inputs); inputsGpu.copyFrom(inputs);
outputsGpu.copyFrom(outputs); outputsGpu.copyFrom(outputs);
#if 0
FuncConfig config;
config.set("size", (size_t)sizeX);
config.set("scale", scale);
config.set("pow", pow);
#endif
FunctionBase* cpu = FunctionBase* cpu =
FunctionBase::funcRegistrar_.createByType(FUNC_NAME(CrossMapNormal, CPU)); FunctionBase::funcRegistrar_.createByType(FUNC_NAME(CrossMapNormal, CPU));
FunctionBase* gpu = FunctionBase* gpu =
...@@ -1311,22 +1304,6 @@ void testCrossMapNormalFwd( ...@@ -1311,22 +1304,6 @@ void testCrossMapNormalFwd(
{Tensor(inputsGpu.getData(), dims)}, {Tensor(inputsGpu.getData(), dims)},
{Tensor(outputsGpu.getData(), dims), Tensor(denomsGpu.getData(), dims)}, {Tensor(outputsGpu.getData(), dims), Tensor(denomsGpu.getData(), dims)},
{}); {});
#if 0
CrossMapNormal<DEVICE_TYPE_CPU> cpuCross;
cpuCross(
outputs, denoms, inputs, channels, imgSizeH, imgSizeW, sizeX, scale, pow);
CrossMapNormal<DEVICE_TYPE_GPU> gpuCross;
gpuCross(outputsGpu,
denomsGpu,
inputsGpu,
channels,
imgSizeH,
imgSizeW,
sizeX,
scale,
pow);
#endif
TensorCheckErr(outputs, outputsGpu); TensorCheckErr(outputs, outputsGpu);
TensorCheckErr(denoms, denomsGpu); TensorCheckErr(denoms, denomsGpu);
...@@ -1381,6 +1358,35 @@ void testCrossMapNormalBwd( ...@@ -1381,6 +1358,35 @@ void testCrossMapNormalBwd(
outputsValueGpu.copyFrom(outputsValue); outputsValueGpu.copyFrom(outputsValue);
inputsGradGpu.copyFrom(inputsGrad); inputsGradGpu.copyFrom(inputsGrad);
FunctionBase* cpu = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormalGrad, CPU));
FunctionBase* gpu = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormalGrad, GPU));
cpu->init(FuncConfig()
.set("size", (size_t)sizeX)
.set("scale", scale)
.set("pow", pow));
gpu->init(FuncConfig()
.set("size", (size_t)sizeX)
.set("scale", scale)
.set("pow", pow));
Dims dims{
(size_t)numSamples, (size_t)channels, (size_t)imgSizeH, (size_t)imgSizeW};
cpu->calc({Tensor(inputsValue.getData(), dims),
Tensor(outputsValue.getData(), dims),
Tensor(outputsGrad.getData(), dims),
Tensor(denoms.getData(), dims)},
{Tensor(inputsGrad.getData(), dims)},
{});
gpu->calc({Tensor(inputsValueGpu.getData(), dims),
Tensor(outputsValueGpu.getData(), dims),
Tensor(outputsGradGpu.getData(), dims),
Tensor(denomsGpu.getData(), dims)},
{Tensor(inputsGradGpu.getData(), dims)},
{});
#if 0
CrossMapNormalGrad<DEVICE_TYPE_CPU> cpuCross; CrossMapNormalGrad<DEVICE_TYPE_CPU> cpuCross;
cpuCross(inputsGrad, cpuCross(inputsGrad,
inputsValue, inputsValue,
...@@ -1406,6 +1412,7 @@ void testCrossMapNormalBwd( ...@@ -1406,6 +1412,7 @@ void testCrossMapNormalBwd(
sizeX, sizeX,
scale, scale,
pow); pow);
#endif
TensorCheckErr(inputsGrad, inputsGradGpu); TensorCheckErr(inputsGrad, inputsGradGpu);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册