提交 d11e2b40 编写于 作者: H hedaoyuan

Remove some useless code

上级 558e8692
......@@ -240,62 +240,6 @@ extern void hl_avgpool_backward(const int frameCnt,
real* backGrad,
const int outStride);
/**
* @brief Cross-map-respose normalize forward.
*
* @param[in] frameCnt batch size of input image.
* @param[in] in input data.
* @param[in] scale buffer.
* @param[out] out output data.
* @param[in] channels number of channel.
* @param[in] height image height.
* @param[in] width image width.
* @param[in] sizeX size.
* @param[in] alpha scale.
* @param[in] beta scale.
*
*/
extern void hl_CMRNorm_forward(size_t frameCnt,
const real* in,
real* scale,
real* out,
size_t channels,
size_t height,
size_t width,
size_t sizeX,
real alpha,
real beta);
/**
* @brief Cross-map-respose normalize backward.
*
* @param[in] frameCnt batch size of input image.
* @param[in] inV input data.
* @param[in] scale buffer.
* @param[out] outV output value.
* @param[out] outDiff output grad.
* @param[out] inDiff input grad.
* @param[in] channels number of channel.
* @param[in] height image height.
* @param[in] width image width.
* @param[in] sizeX size.
* @param[in] alpha scale.
* @param[in] beta scale.
*
*/
extern void hl_CMRNorm_backward(size_t frameCnt,
const real* inV,
const real* scale,
const real* outV,
const real* outDiff,
real* inDiff,
size_t channels,
size_t height,
size_t width,
size_t sizeX,
real alpha,
real beta);
/**
* @brief Bilinear interpolation forward.
*
......
......@@ -117,30 +117,6 @@ inline void hl_avgpool_backward(const int frameCnt,
real* backGrad,
const int outStride) {}
inline void hl_CMRNorm_forward(size_t frameCnt,
const real* in,
real* scale,
real* out,
size_t channels,
size_t height,
size_t width,
size_t sizeX,
real alpha,
real beta) {}
inline void hl_CMRNorm_backward(size_t frameCnt,
const real* inV,
const real* scale,
const real* outV,
const real* outDiff,
real* inDiff,
size_t channels,
size_t height,
size_t width,
size_t sizeX,
real alpha,
real beta) {}
inline void hl_bilinear_forward(const real* inData,
const size_t inImgH,
const size_t inImgW,
......
......@@ -381,126 +381,6 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
CHECK_SYNC("hl_avgpool_backward failed");
}
__global__ void KeCMRNormFillScale(size_t imageSize, const real* in,
real* scale, size_t channels,
size_t height, size_t width, size_t size,
real alpha) {
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < imageSize) {
const int w = idx % width;
const int h = (idx / width) % height;
const int n = idx / width / height;
const int offset = (n * channels * height + h) * width + w;
in += offset;
scale += offset;
const int step = height * width;
const int pre_pad = (size - 1) / 2;
const int post_pad = size - pre_pad - 1;
real accum = 0;
int index = 0;
while (index < channels + post_pad) {
if (index < channels) {
accum += in[index * step] * in[index * step];
}
if (index >= size) {
accum -= in[(index - size) * step] * in[(index - size) * step];
}
if (index >= post_pad) {
scale[(index - post_pad) * step] = 1. + accum * alpha;
}
++index;
}
}
}
__global__ void KeCMRNormOutput(size_t inputSize, const real* in,
const real* scale, real negative_beta,
real* out) {
const int index = threadIdx.x + blockIdx.x * blockDim.x;
if (index < inputSize) {
out[index] = in[index] * pow(scale[index], negative_beta);
}
}
void hl_CMRNorm_forward(size_t frameCnt, const real* in, real* scale,
real* out, size_t channels,
size_t height, size_t width, size_t sizeX,
real alpha, real beta) {
size_t imageSize = frameCnt * height * width;
int blockSize = 1024;
int gridSize = (imageSize + 1024 - 1) / 1024;
KeCMRNormFillScale<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
(imageSize, in, scale, channels, height, width, sizeX, alpha);
size_t inputSize = frameCnt * height * width *channels;
blockSize = 1024;
gridSize = (inputSize + 1024 - 1) / 1024;
KeCMRNormOutput<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
(inputSize, in, scale, beta, out);
CHECK_SYNC("hl_CMRNorm_forward");
}
__global__ void KeCMRNormDiff(size_t imageSize, const real* bottom_data,
const real* top_data, const real* scale,
const real* top_diff, size_t channels,
size_t height, size_t width, size_t size,
real negative_beta, real cache_ratio,
real* bottom_diff ) {
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < imageSize) {
const int w = idx % width;
const int h = (idx / width) % height;
const int n = idx / width / height;
const int offset = (n * channels * height + h) * width + w;
bottom_data += offset;
top_data += offset;
scale += offset;
top_diff += offset;
bottom_diff += offset;
const int step = height * width;
const int pre_pad = size - (size + 1) / 2;
const int post_pad = size - pre_pad - 1;
int index = 0;
real accum = 0;
while (index < channels + post_pad) {
if (index < channels) {
accum += top_diff[index * step] * top_data[index * step] /
scale[index * step];
}
if (index >= size) {
accum -= top_diff[(index - size) * step] *
top_data[(index - size) * step] / scale[(index - size) * step];
}
if (index >= post_pad) {
bottom_diff[(index - post_pad) * step] +=
top_diff[(index - post_pad) * step] *
pow(scale[(index - post_pad) * step], negative_beta) - cache_ratio *
bottom_data[(index - post_pad) * step] * accum;
}
++index;
}
}
}
void hl_CMRNorm_backward(size_t frameCnt, const real* inV,
const real* scale,
const real* outV, const real* outDiff,
real *inDiff, size_t channels,
size_t height, size_t width, size_t sizeX,
real alpha, real beta) {
size_t imageSize = frameCnt * height * width;
int blockSize = 1024;
int gridSize = (imageSize + 1024 - 1) / 1024;
KeCMRNormDiff <<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
(imageSize, inV, outV, scale, outDiff, channels,
height, width, sizeX, alpha, beta, inDiff);
CHECK_SYNC("hl_CMRNorm_backward");
}
__global__ void KeBilinearInterpFw(const real* in,
const size_t inImgH,
const size_t inImgW,
......
......@@ -110,34 +110,5 @@ void CMRProjectionNormLayer::backward(const UpdateCallback& callback) {
Tensor(denoms_->getData(), dims_)},
{Tensor(preOutGrad->getData(), dims_)},
{});
#if 0
if (useGpu_) {
CrossMapNormalGrad<DEVICE_TYPE_GPU> crossGrad;
crossGrad(dynamic_cast<GpuMatrix&>(*preOutGrad),
dynamic_cast<GpuMatrix&>(*preOutV),
dynamic_cast<GpuMatrix&>(*localGrad),
dynamic_cast<GpuMatrix&>(*localOutV),
dynamic_cast<GpuMatrix&>(*denoms_),
channels_,
imgSizeH_,
imgSizeW_,
size_,
scale_,
pow_);
} else {
CrossMapNormalGrad<DEVICE_TYPE_CPU> crossGrad;
crossGrad(dynamic_cast<CpuMatrix&>(*preOutGrad),
dynamic_cast<CpuMatrix&>(*preOutV),
dynamic_cast<CpuMatrix&>(*localGrad),
dynamic_cast<CpuMatrix&>(*localOutV),
dynamic_cast<CpuMatrix&>(*denoms_),
channels_,
imgSizeH_,
imgSizeW_,
size_,
scale_,
pow_);
}
#endif
}
} // namespace paddle
......@@ -1265,69 +1265,6 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad,
outGrad.getStride());
}
void GpuMatrix::crossMapNormalFwd(Matrix& input,
size_t imgSizeH,
size_t imgSizeW,
Matrix& denoms,
size_t channels,
size_t sizeX,
float scale,
float pow) {
size_t num = input.getHeight();
size_t height = imgSizeH;
size_t width = imgSizeW;
CHECK(height * width * channels == input.getWidth());
CHECK(denoms.getHeight() == input.getHeight() &&
denoms.getWidth() == input.getWidth() && input.getHeight() == height_ &&
input.getWidth() == width_);
hl_CMRNorm_forward(num,
input.getData(),
denoms.getData(),
data_,
channels,
height,
width,
sizeX,
scale,
-pow);
}
void GpuMatrix::crossMapNormalBwd(Matrix& localGrad,
Matrix& denoms,
Matrix& preOutV,
Matrix& localOutV,
size_t channels,
size_t imgSizeH,
size_t imgSizeW,
size_t sizeX,
float scale,
float pow) {
size_t num = preOutV.getHeight();
size_t height = imgSizeH;
size_t width = imgSizeW;
CHECK(width * height * channels == preOutV.getWidth());
CHECK(denoms.getHeight() == preOutV.getHeight() &&
denoms.getWidth() == preOutV.getWidth() &&
preOutV.getHeight() == height_ && preOutV.getWidth() == width_);
CHECK(denoms.getHeight() == localGrad.getHeight() &&
denoms.getWidth() == localGrad.getWidth());
hl_CMRNorm_backward(num,
preOutV.getData(),
denoms.getData(),
localOutV.getData(),
localGrad.getData(),
data_,
channels,
height,
width,
sizeX,
-pow,
2.0f * pow * scale);
}
void GpuMatrix::maxSequenceForward(Matrix& input,
const IVector& sequence,
IVector& index) {
......@@ -2219,119 +2156,6 @@ void CpuMatrix::avgPoolBackward(Matrix& input,
}
}
void CpuMatrix::crossMapNormalFwd(Matrix& input,
size_t imgSizeH,
size_t imgSizeW,
Matrix& denoms,
size_t channels,
size_t sizeX,
float scale,
float pow) {
CHECK(isContiguous());
CHECK(input.isContiguous());
CHECK(denoms.isContiguous());
CHECK_EQ(getHeight(), input.getHeight());
CHECK_EQ(getWidth(), input.getWidth());
CHECK_EQ(getHeight(), denoms.getHeight());
CHECK_EQ(getWidth(), denoms.getWidth());
size_t numSample = input.getHeight();
size_t numCols = input.getWidth();
size_t height = imgSizeH;
size_t width = imgSizeW;
CHECK(height * width * channels == numCols);
// TODO(hedaoyuan) After commit TensorExpress code,
// Reconstruction this code to remove the temporary memory.
CpuMatrix tmp(channels, height * width);
CpuMatrix tmp2(tmp.getData(), 1, channels * height * width);
denoms.zero();
const int start = -((int)sizeX - 1) / 2;
const int end = (int)sizeX + start;
for (size_t i = 0; i < numSample; i++) {
input.subMatrix(i, 1)->square2(tmp2);
CpuMatrix subDen(
denoms.subMatrix(i, 1)->getData(), channels, height * width);
for (int c = 0; c < (int)channels; c++) {
for (int s = start; s < end; s++) {
if (c + s >= 0 && c + s < (int)channels) {
subDen.subMatrix(c, 1)->add(*tmp.subMatrix(c + s, 1));
}
}
}
}
denoms.add(scale, (real)1);
this->pow2(denoms, -pow);
this->dotMul(input);
}
void CpuMatrix::crossMapNormalBwd(Matrix& localGrad,
Matrix& denoms,
Matrix& preOutV,
Matrix& localOutV,
size_t channels,
size_t imgSizeH,
size_t imgSizeW,
size_t sizeX,
float scale,
float pow) {
CHECK(isContiguous());
CHECK(localGrad.isContiguous());
CHECK(denoms.isContiguous());
CHECK(preOutV.isContiguous());
CHECK(localOutV.isContiguous());
CHECK_EQ(getHeight(), localGrad.getHeight());
CHECK_EQ(getWidth(), localGrad.getWidth());
CHECK_EQ(getHeight(), denoms.getHeight());
CHECK_EQ(getWidth(), denoms.getWidth());
CHECK_EQ(getHeight(), preOutV.getHeight());
CHECK_EQ(getWidth(), preOutV.getWidth());
CHECK_EQ(getHeight(), localOutV.getHeight());
CHECK_EQ(getWidth(), localOutV.getWidth());
size_t numSample = getHeight();
size_t numCols = getWidth();
size_t height = imgSizeH;
size_t width = imgSizeW;
CHECK(height * width * channels == numCols);
// TODO(hedaoyuan) After commit TensorExpress code,
// Reconstruction this code to remove the temporary memory.
CpuMatrix tmp(1, height * width);
const int start = -((int)sizeX) / 2;
const int end = (int)sizeX + start;
const real ratio = -(real)2 * scale * pow;
for (size_t i = 0; i < numSample; i++) {
CpuMatrix inputDiff(
this->subMatrix(i, 1)->getData(), channels, height * width);
CpuMatrix outDiff(
localGrad.subMatrix(i, 1)->getData(), channels, height * width);
CpuMatrix input(
preOutV.subMatrix(i, 1)->getData(), channels, height * width);
CpuMatrix output(
localOutV.subMatrix(i, 1)->getData(), channels, height * width);
CpuMatrix subDen(
denoms.subMatrix(i, 1)->getData(), channels, height * width);
for (int c = 0; c < (int)channels; c++) {
tmp.pow2(*subDen.subMatrix(c, 1), -pow);
inputDiff.subMatrix(c, 1)
->addDotMul(tmp, *outDiff.subMatrix(c, 1), (real)1, (real)1);
for (int s = start; s < end; s++) {
if (c + s >= 0 && c + s < (int)channels) {
tmp.dotMul(*outDiff.subMatrix(c + s, 1), *output.subMatrix(c + s, 1));
tmp.mulScalar(ratio);
tmp.dotDiv(tmp, *subDen.subMatrix(c + s, 1));
tmp.dotMul(*input.subMatrix(c, 1));
inputDiff.subMatrix(c, 1)->add(tmp);
}
}
}
}
}
/**
* Input: one or more sequences. Each sequence contains some instances.
* Output: output size is the number of input sequences (NOT input instances).
......
......@@ -952,31 +952,6 @@ public:
LOG(FATAL) << "Not implemeted";
}
/// normalize-operation.
virtual void crossMapNormalFwd(Matrix& input,
size_t imgSizeH,
size_t imgSizeW,
Matrix& denoms,
size_t channels,
size_t sizeX,
float scale,
float pow) {
LOG(FATAL) << "Not implemeted";
}
virtual void crossMapNormalBwd(Matrix& localGrad,
Matrix& denoms,
Matrix& preOutV,
Matrix& localOutV,
size_t channels,
size_t imgSizeH,
size_t imgSizeW,
size_t size,
float scale,
float pow) {
LOG(FATAL) << "Not implemeted";
}
/**
* Input: one or more sequences. Each sequence contains some instances.
*
......@@ -1459,26 +1434,6 @@ public:
size_t paddingH,
size_t paddingW);
void crossMapNormalFwd(Matrix& input,
size_t imgSizeH,
size_t imgSizeW,
Matrix& denoms,
size_t channels,
size_t sizeX,
float scale,
float pow);
void crossMapNormalBwd(Matrix& localGrad,
Matrix& denoms,
Matrix& preOutV,
Matrix& localOutV,
size_t channels,
size_t imgSizeH,
size_t imgSizeW,
size_t sizeX,
float scale,
float pow);
void maxSequenceForward(Matrix& input,
const IVector& sequence,
IVector& index);
......@@ -1685,26 +1640,6 @@ public:
size_t paddingH,
size_t paddingW);
void crossMapNormalFwd(Matrix& input,
size_t imgSizeH,
size_t imgSizeW,
Matrix& denoms,
size_t channels,
size_t sizeX,
float scale,
float pow);
void crossMapNormalBwd(Matrix& localGrad,
Matrix& denoms,
Matrix& preOutV,
Matrix& localOutV,
size_t channels,
size_t imgSizeH,
size_t imgSizeW,
size_t sizeX,
float scale,
float pow);
void maxSequenceForward(Matrix& input,
const IVector& sequence,
IVector& index);
......
......@@ -1385,33 +1385,6 @@ void testCrossMapNormalBwd(
Tensor(denomsGpu.getData(), dims)},
{Tensor(inputsGradGpu.getData(), dims)},
{});
#if 0
CrossMapNormalGrad<DEVICE_TYPE_CPU> cpuCross;
cpuCross(inputsGrad,
inputsValue,
outputsGrad,
outputsValue,
denoms,
channels,
imgSizeH,
imgSizeW,
sizeX,
scale,
pow);
CrossMapNormalGrad<DEVICE_TYPE_GPU> gpuCross;
gpuCross(inputsGradGpu,
inputsValueGpu,
outputsGradGpu,
outputsValueGpu,
denomsGpu,
channels,
imgSizeH,
imgSizeW,
sizeX,
scale,
pow);
#endif
TensorCheckErr(inputsGrad, inputsGradGpu);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册