提交 d2d00106 编写于 作者: H hedaoyuan

add CrossMapNormalGradFunc

上级 9171ab0a
......@@ -13,10 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "NormProjectionLayer.h"
#include "paddle/math/cross_map_normal_op.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "paddle/math/cross_map_normal_op.h"
#include "NormProjectionLayer.h"
namespace paddle {
size_t CMRProjectionNormLayer::getSize() {
......@@ -48,13 +47,23 @@ bool CMRProjectionNormLayer::init(const LayerMap& layerMap,
CHECK_EQ(config_.inputs_size(), 1);
if (useGpu_) {
normal_ = FunctionBase::funcRegistrar_.createByType(
forward_ = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormal, GPU));
} else {
normal_ = FunctionBase::funcRegistrar_.createByType(
forward_ = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormal, CPU));
}
normal_->init(
forward_->init(
FuncConfig().set("size", size_).set("scale", scale_).set("pow", pow_));
if (useGpu_) {
backward_ = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormalGrad, GPU));
} else {
backward_ = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormalGrad, CPU));
}
backward_->init(
FuncConfig().set("size", size_).set("scale", scale_).set("pow", pow_));
return true;
......@@ -74,13 +83,13 @@ void CMRProjectionNormLayer::forward(PassType passType) {
Matrix::resizeOrCreate(denoms_, batchSize, size, /* trans */ false, useGpu_);
Dims dims{(size_t)batchSize,
(size_t)channels_,
(size_t)imgSizeH_,
(size_t)imgSizeW_};
normal_->calc(
{Tensor(input->getData(), dims)},
{Tensor(outV->getData(), dims), Tensor(denoms_->getData(), dims)},
dims_ = {(size_t)batchSize,
(size_t)channels_,
(size_t)imgSizeH_,
(size_t)imgSizeW_};
forward_->calc(
{Tensor(input->getData(), dims_)},
{Tensor(outV->getData(), dims_), Tensor(denoms_->getData(), dims_)},
{});
}
......@@ -96,6 +105,13 @@ void CMRProjectionNormLayer::backward(const UpdateCallback& callback) {
MatrixPtr localOutV = getOutputValue();
MatrixPtr preOutV = inputLayers_[0]->getOutputValue();
backward_->calc({Tensor(preOutV->getData(), dims_),
Tensor(localOutV->getData(), dims_),
Tensor(localGrad->getData(), dims_),
Tensor(denoms_->getData(), dims_)},
{Tensor(preOutGrad->getData(), dims_)},
{});
#if 0
if (useGpu_) {
CrossMapNormalGrad<DEVICE_TYPE_GPU> crossGrad;
crossGrad(dynamic_cast<GpuMatrix&>(*preOutGrad),
......@@ -123,5 +139,6 @@ void CMRProjectionNormLayer::backward(const UpdateCallback& callback) {
scale_,
pow_);
}
#endif
}
} // namespace paddle
......@@ -16,9 +16,8 @@ limitations under the License. */
#include <vector>
#include "NormLayer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Function.h"
#include <vector>
#include "paddle/math/Matrix.h"
namespace paddle {
......@@ -43,6 +42,8 @@ public:
void backward(const UpdateCallback& callback = nullptr);
protected:
FunctionBase* normal_;
Dims dims_;
FunctionBase* forward_;
FunctionBase* backward_;
};
} // namespace paddle
......@@ -16,8 +16,8 @@ limitations under the License. */
#include <map>
#include <vector>
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/ClassRegistrar.h"
namespace paddle {
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "cross_map_normal_op.h"
#include "paddle/math/Vector.h"
namespace paddle {
......@@ -56,66 +57,49 @@ void CrossMapNormal<DEVICE_TYPE_CPU>(real* outputs,
}
template <>
void CrossMapNormalGrad<DEVICE_TYPE_CPU>::operator()(CpuMatrix& inputsGrad,
CpuMatrix& inputsValue,
CpuMatrix& outputsGrad,
CpuMatrix& outputsValue,
CpuMatrix& denoms,
size_t channels,
size_t imgSizeH,
size_t imgSizeW,
size_t sizeX,
real scale,
real pow) {
CHECK(inputsGrad.isContiguous());
CHECK(outputsGrad.isContiguous());
CHECK(denoms.isContiguous());
CHECK(inputsValue.isContiguous());
CHECK(outputsValue.isContiguous());
CHECK_EQ(inputsGrad.getHeight(), outputsGrad.getHeight());
CHECK_EQ(inputsGrad.getWidth(), outputsGrad.getWidth());
CHECK_EQ(inputsGrad.getHeight(), denoms.getHeight());
CHECK_EQ(inputsGrad.getWidth(), denoms.getWidth());
CHECK_EQ(inputsGrad.getHeight(), inputsValue.getHeight());
CHECK_EQ(inputsGrad.getWidth(), inputsValue.getWidth());
CHECK_EQ(inputsGrad.getHeight(), outputsValue.getHeight());
CHECK_EQ(inputsGrad.getWidth(), outputsValue.getWidth());
size_t numSample = inputsGrad.getHeight();
size_t numCols = inputsGrad.getWidth();
size_t imageSize = imgSizeH * imgSizeW;
CHECK(imageSize * channels == numCols);
void CrossMapNormalGrad<DEVICE_TYPE_CPU>(real* inputsGrad,
real* inputsValue,
real* outputsValue,
real* outputsGrad,
real* denoms,
size_t numSamples,
size_t channels,
size_t height,
size_t width,
size_t size,
real scale,
real pow) {
size_t oneSample = channels * height * width;
std::function<CpuVector(real*, size_t)> oneImage = [=](real* data,
size_t offset) {
return CpuVector(imageSize, data + offset);
return CpuVector(height * width, data + offset);
};
const int start = -((int)sizeX) / 2;
const int end = (int)sizeX + start;
const int start = -((int)size) / 2;
const int end = (int)size + start;
const real ratio = -(real)2 * scale * pow;
for (size_t i = 0; i < numSample; i++) {
size_t sOffset = i * numCols;
real* inputGradData = inputsGrad.getData() + sOffset;
real* inputData = inputsValue.getData() + sOffset;
real* denomData = denoms.getData() + sOffset;
real* outputGradData = outputsGrad.getData() + sOffset;
real* outputData = outputsValue.getData() + sOffset;
for (size_t i = 0; i < numSamples; i++) {
size_t sOffset = i * oneSample;
real* oneInputGrad = inputsGrad + sOffset;
real* oneInputValue = inputsValue + sOffset;
real* oneDenom = denoms + sOffset;
real* oneOutputGrad = outputsGrad + sOffset;
real* oneOutputValue = outputsValue + sOffset;
for (int c = 0; c < (int)channels; c++) {
size_t cOffset = c * imageSize;
CpuVector inputGrad = oneImage(inputGradData, cOffset);
CpuVector inputValue = oneImage(inputData, cOffset);
CpuVector denom = oneImage(denomData, cOffset);
CpuVector outputGrad = oneImage(outputGradData, cOffset);
size_t cOffset = c * height * width;
CpuVector inputGrad = oneImage(oneInputGrad, cOffset);
CpuVector inputValue = oneImage(oneInputValue, cOffset);
CpuVector denom = oneImage(oneDenom, cOffset);
CpuVector outputGrad = oneImage(oneOutputGrad, cOffset);
inputGrad = inputGrad + denom.pow(-pow) * outputGrad;
for (int s = start; s < end; s++) {
if (c + s >= 0 && c + s < (int)channels) {
size_t offset = (c + s) * imageSize;
CpuVector output = oneImage(outputData, offset);
CpuVector outputGrad = oneImage(outputGradData, offset);
CpuVector denom = oneImage(denomData, offset);
size_t offset = (c + s) * height * width;
CpuVector output = oneImage(oneOutputValue, offset);
CpuVector outputGrad = oneImage(oneOutputGrad, offset);
CpuVector denom = oneImage(oneDenom, offset);
inputGrad += ((outputGrad * output * ratio) / denom) * inputValue;
}
......@@ -124,6 +108,11 @@ void CrossMapNormalGrad<DEVICE_TYPE_CPU>::operator()(CpuMatrix& inputsGrad,
}
}
/**
* \param inputs[0] input value.
* \param outputs[0] output value.
* \param outputs[1] denoms.
*/
template <DeviceType Device>
class CrossMapNormalFunc : public FunctionBase {
public:
......@@ -169,7 +158,65 @@ private:
real pow_;
};
/**
* \param inputs[0] input value.
* \param inputs[1] output value.
* \param inputs[2] output grad.
* \param inputs[3] denoms.
* \param outputs[0] input grad.
*/
template <DeviceType Device>
class CrossMapNormalGradFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
size_ = config.get<size_t>("size");
scale_ = config.get<real>("scale");
pow_ = config.get<real>("pow");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(4, inputs.size());
CHECK_EQ(1, outputs.size());
CHECK_EQ(0, inouts.size());
CHECK_EQ(inputs[0].dims_.size(), 4);
for (size_t i = 0; i < inputs[0].dims_.size(); i++) {
CHECK_EQ(inputs[0].dims_[i], inputs[1].dims_[i]);
CHECK_EQ(inputs[0].dims_[i], inputs[2].dims_[i]);
CHECK_EQ(inputs[0].dims_[i], inputs[3].dims_[i]);
CHECK_EQ(inputs[0].dims_[i], outputs[0].dims_[i]);
}
size_t samples = inputs[0].dims_[0];
size_t channels = inputs[0].dims_[1];
size_t height = inputs[0].dims_[2];
size_t width = inputs[0].dims_[3];
CrossMapNormalGrad<Device>(outputs[0].getData(),
inputs[0].getData(),
inputs[1].getData(),
inputs[2].getData(),
inputs[3].getData(),
samples,
channels,
height,
width,
size_,
scale_,
pow_);
}
private:
size_t size_;
real scale_;
real pow_;
};
REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc);
} // namespace paddle
......@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once
#include "Function.h"
#include "paddle/math/Matrix.h"
namespace paddle {
......@@ -30,34 +29,19 @@ void CrossMapNormal(real* outputs,
size_t size,
real scale,
real pow);
#if 0
template <DeviceType Device>
struct CrossMapNormal {
void operator()(typename MatrixT<Device>::type& outputs,
typename MatrixT<Device>::type& denoms,
typename MatrixT<Device>::type& inputs,
size_t channels,
size_t imgSizeH,
size_t imgSizeW,
size_t sizeX,
real scale,
real pow);
};
#endif
template <DeviceType Device>
struct CrossMapNormalGrad {
void operator()(typename MatrixT<Device>::type& inputsGrad,
typename MatrixT<Device>::type& inputsValue,
typename MatrixT<Device>::type& outputsGrad,
typename MatrixT<Device>::type& outputsValue,
typename MatrixT<Device>::type& denoms,
size_t channels,
size_t imgSizeH,
size_t imgSizeW,
size_t sizeX,
real scale,
real pow);
};
void CrossMapNormalGrad(real* inputsGrad,
real* inputsValue,
real* outputsValue,
real* outputsGrad,
real* denoms,
size_t numSamples,
size_t channels,
size_t height,
size_t width,
size_t size,
real scale,
real pow);
} // namespace paddle
......@@ -131,48 +131,26 @@ __global__ void KeCMRNormDiff(size_t imageSize, const real* bottom_data,
}
template <>
void CrossMapNormalGrad<DEVICE_TYPE_GPU>::operator()(GpuMatrix& inputsGrad,
GpuMatrix& inputsValue,
GpuMatrix& outputsGrad,
GpuMatrix& outputsValue,
GpuMatrix& denoms,
size_t channels,
size_t imgSizeH,
size_t imgSizeW,
size_t sizeX,
real scale,
real pow) {
CHECK(inputsGrad.isContiguous());
CHECK(outputsGrad.isContiguous());
CHECK(denoms.isContiguous());
CHECK(inputsValue.isContiguous());
CHECK(outputsValue.isContiguous());
CHECK_EQ(inputsGrad.getHeight(), outputsGrad.getHeight());
CHECK_EQ(inputsGrad.getWidth(), outputsGrad.getWidth());
CHECK_EQ(inputsGrad.getHeight(), denoms.getHeight());
CHECK_EQ(inputsGrad.getWidth(), denoms.getWidth());
CHECK_EQ(inputsGrad.getHeight(), inputsValue.getHeight());
CHECK_EQ(inputsGrad.getWidth(), inputsValue.getWidth());
CHECK_EQ(inputsGrad.getHeight(), outputsValue.getHeight());
CHECK_EQ(inputsGrad.getWidth(), outputsValue.getWidth());
size_t numSample = inputsGrad.getHeight();
size_t numCols = inputsGrad.getWidth();
CHECK(imgSizeH * imgSizeW * channels == numCols);
size_t imageSize = numSample * imgSizeH * imgSizeW;
real* inputsGradData = inputsGrad.getData();
real* inputsData = inputsValue.getData();
real* denomsData = denoms.getData();
real* outputsGradData = outputsGrad.getData();
real* outputsData = outputsValue.getData();
void CrossMapNormalGrad<DEVICE_TYPE_GPU>(real* inputsGrad,
real* inputsValue,
real* outputsValue,
real* outputsGrad,
real* denoms,
size_t numSamples,
size_t channels,
size_t height,
size_t width,
size_t size,
real scale,
real pow) {
size_t imageSize = numSamples * height * width;
int blockSize = 1024;
int gridSize = (imageSize + 1024 - 1) / 1024;
KeCMRNormDiff <<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
(imageSize, inputsData, outputsData, denomsData, outputsGradData, channels,
imgSizeH, imgSizeW, sizeX, -pow, 2.0f * pow * scale, inputsGradData);
CHECK_SYNC("KeCMRNormDiff");
(imageSize, inputsValue, outputsValue, denoms, outputsGrad, channels,
height, width, size, -pow, 2.0f * pow * scale, inputsGrad);
CHECK_SYNC("CrossMapNormalGrad");
}
} // namespace paddle
......@@ -19,12 +19,11 @@ limitations under the License. */
#include <gtest/gtest.h>
#include "TensorCheck.h"
#include "paddle/gserver/tests/TestUtil.h"
#include "paddle/math/Function.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Stat.h"
#include "TensorCheck.h"
#include "paddle/math/cross_map_normal_op.h"
#include "paddle/math/Function.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h"
using namespace paddle; // NOLINT
......@@ -1282,12 +1281,6 @@ void testCrossMapNormalFwd(
inputsGpu.copyFrom(inputs);
outputsGpu.copyFrom(outputs);
#if 0
FuncConfig config;
config.set("size", (size_t)sizeX);
config.set("scale", scale);
config.set("pow", pow);
#endif
FunctionBase* cpu =
FunctionBase::funcRegistrar_.createByType(FUNC_NAME(CrossMapNormal, CPU));
FunctionBase* gpu =
......@@ -1311,22 +1304,6 @@ void testCrossMapNormalFwd(
{Tensor(inputsGpu.getData(), dims)},
{Tensor(outputsGpu.getData(), dims), Tensor(denomsGpu.getData(), dims)},
{});
#if 0
CrossMapNormal<DEVICE_TYPE_CPU> cpuCross;
cpuCross(
outputs, denoms, inputs, channels, imgSizeH, imgSizeW, sizeX, scale, pow);
CrossMapNormal<DEVICE_TYPE_GPU> gpuCross;
gpuCross(outputsGpu,
denomsGpu,
inputsGpu,
channels,
imgSizeH,
imgSizeW,
sizeX,
scale,
pow);
#endif
TensorCheckErr(outputs, outputsGpu);
TensorCheckErr(denoms, denomsGpu);
......@@ -1381,6 +1358,35 @@ void testCrossMapNormalBwd(
outputsValueGpu.copyFrom(outputsValue);
inputsGradGpu.copyFrom(inputsGrad);
FunctionBase* cpu = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormalGrad, CPU));
FunctionBase* gpu = FunctionBase::funcRegistrar_.createByType(
FUNC_NAME(CrossMapNormalGrad, GPU));
cpu->init(FuncConfig()
.set("size", (size_t)sizeX)
.set("scale", scale)
.set("pow", pow));
gpu->init(FuncConfig()
.set("size", (size_t)sizeX)
.set("scale", scale)
.set("pow", pow));
Dims dims{
(size_t)numSamples, (size_t)channels, (size_t)imgSizeH, (size_t)imgSizeW};
cpu->calc({Tensor(inputsValue.getData(), dims),
Tensor(outputsValue.getData(), dims),
Tensor(outputsGrad.getData(), dims),
Tensor(denoms.getData(), dims)},
{Tensor(inputsGrad.getData(), dims)},
{});
gpu->calc({Tensor(inputsValueGpu.getData(), dims),
Tensor(outputsValueGpu.getData(), dims),
Tensor(outputsGradGpu.getData(), dims),
Tensor(denomsGpu.getData(), dims)},
{Tensor(inputsGradGpu.getData(), dims)},
{});
#if 0
CrossMapNormalGrad<DEVICE_TYPE_CPU> cpuCross;
cpuCross(inputsGrad,
inputsValue,
......@@ -1406,6 +1412,7 @@ void testCrossMapNormalBwd(
sizeX,
scale,
pow);
#endif
TensorCheckErr(inputsGrad, inputsGradGpu);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册