“c1c5ed644428acc8395ea9ea28518a1c66d790b3”上不存在“...unarchived/neural_machine_translation/transformer/_ce.py”
提交 52ceeedb 编写于 作者: C chengduoZH

Add col2vol and vol2col CPU funtion

上级 23cf0c61
...@@ -1389,6 +1389,52 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { ...@@ -1389,6 +1389,52 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
output_d, grad_d, mat_d, height_, width_); output_d, grad_d, mat_d, height_, width_);
} }
void GpuMatrix::vol2Col(real* data,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW) {
hl_matrix_vol2Col(data,
channels, depth, height, width,
filterD, filterH, filterW,
strideD, strideH, strideW,
paddingD, paddingH, paddingW, getData());
}
void GpuMatrix::col2Vol(real* trg,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real alpha,
real beta) {
hl_matrix_col2Vol(trg,
channels, depth, height, width,
filterD, filterH, filterW,
strideD, strideH, strideW,
paddingD, paddingH, paddingW,
getData(),
alpha, beta);
}
/** /**
* CpuMatrix * CpuMatrix
*/ */
...@@ -3975,6 +4021,95 @@ void CpuMatrix::bilinearBackward(const Matrix& out, ...@@ -3975,6 +4021,95 @@ void CpuMatrix::bilinearBackward(const Matrix& out,
} }
} }
void CpuMatrix::vol2Col(real* data,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW) {
real* outData = getData();
int outHeight = (height + 2 * paddingH - filterH) / strideH + 1;
int outWidth = (width + 2 * paddingW - filterW) / strideW + 1;
int outDepth = (depth + 2 * paddingD - filterD) / strideD + 1;
int channelsCol = channels * filterD * filterH * filterW;
for (int c = 0; c < channelsCol; ++c) {
int wOffset = c % filterW;
int hOffset = (c / filterW) % filterH;
int dOffset = (c / filterW / filterH) % filterD;
int cIn = c / filterW / filterH / filterD;
for (int d = 0; d < outDepth; ++d) {
for (int h = 0; h < outHeight; ++h) {
for (int w = 0; w < outWidth; ++w) {
int dPad = d * strideD - paddingD + dOffset;
int hPad = h * strideH - paddingH + hOffset;
int wPad = w * strideW - paddingW + wOffset;
if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width &&
dPad >= 0 && dPad < depth)
outData[((c * outDepth + d) * outHeight + h) * outWidth + w] =
data[((cIn * depth + dPad) * height + hPad) * width + wPad];
else
outData[((c * outDepth + d) * outHeight + h) * outWidth + w] = 0;
}
}
}
}
}
void CpuMatrix::col2Vol(real* trg,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real alpha,
real beta) {
real* src = getData();
int outDepth = (depth + 2 * paddingH - filterD) / strideD + 1;
int outHeight = (height + 2 * paddingH - filterH) / strideH + 1;
int outWidth = (width + 2 * paddingW - filterW) / strideW + 1;
int channelsCol = channels * filterD * filterH * filterW;
for (int c = 0; c < channelsCol; ++c) {
int wOffset = c % filterW;
int hOffset = (c / filterW) % filterH;
int dOffset = (c / filterW / filterH) % filterD;
int cIm = c / filterW / filterH / filterD;
for (int d = 0; d < outDepth; ++d) {
for (int h = 0; h < outHeight; ++h) {
for (int w = 0; w < outWidth; ++w) {
int dPad = d * strideD - paddingD + dOffset;
int hPad = h * strideH - paddingH + hOffset;
int wPad = w * strideW - paddingW + wOffset;
if (hPad >= 0 && hPad < height && wPad >= 0 && wPad < width &&
dPad >= 0 && dPad < depth)
trg[((cIm * depth + dPad) * height + hPad) * width + wPad] =
alpha *
src[((c * outDepth + d) * outHeight + h) * outWidth + w] +
beta *
trg[((cIm * depth + dPad) * height + hPad) * width + wPad];
}
}
}
}
}
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// functions executed via cpu // // functions executed via cpu //
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
......
...@@ -1039,6 +1039,42 @@ public: ...@@ -1039,6 +1039,42 @@ public:
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
virtual void vol2Col(real* data,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW) {
LOG(FATAL) << "Not implemeted";
}
virtual void col2Vol(real* trg,
int channels,
int depth,
int height,
int width,
int filterD,
int filterH,
int filterW,
int strideD,
int strideH,
int strideW,
int paddingD,
int paddingH,
int paddingW,
real alpha,
real beta) {
LOG(FATAL) << "Not implemeted";
}
virtual void bilinearForward(const Matrix& in, virtual void bilinearForward(const Matrix& in,
const size_t inImgH, const size_t inImgH,
const size_t inImgW, const size_t inImgW,
...@@ -1374,6 +1410,20 @@ public: ...@@ -1374,6 +1410,20 @@ public:
const real ratioH, const real ratioH,
const real ratioW); const real ratioW);
void vol2Col(real* data,
int channels,
int depth, int height, int width,
int filterD, int filterH, int filterW,
int strideD, int strideH, int strideW,
int paddingD, int paddingH, int paddingW);
void col2Vol(real* trg,
int channels, int depth, int height, int width,
int filterD, int filterH, int filterW,
int strideD, int strideH, int strideW,
int paddingD, int paddingH, int paddingW,
real alpha, real beta);
void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label);
void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label);
...@@ -1715,6 +1765,20 @@ public: ...@@ -1715,6 +1765,20 @@ public:
const real ratioH, const real ratioH,
const real ratioW); const real ratioW);
void vol2Col(real* data,
int channels,
int depth, int height, int width,
int filterD, int filterH, int filterW,
int strideD, int strideH, int strideW,
int paddingD, int paddingH, int paddingW);
void col2Vol(real* trg,
int channels, int depth, int height, int width,
int filterD, int filterH, int filterW,
int strideD, int strideH, int strideW,
int paddingD, int paddingH, int paddingW,
real alpha, real beta);
template <typename ExpressionType> template <typename ExpressionType>
void operator=(const ExpressionType& expr) { void operator=(const ExpressionType& expr) {
TensorCpuApply<real>(*this, expr); TensorCpuApply<real>(*this, expr);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册