Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
cdac60f6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cdac60f6
编写于
11月 01, 2016
作者:
Q
qijun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add SpatialPyramidPoolLayer c++ support
上级
46bd5f53
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
562 addition
and
182 deletion
+562
-182
paddle/cuda/include/hl_cnn.h
paddle/cuda/include/hl_cnn.h
+10
-4
paddle/cuda/include/stub/hl_cnn_stub.h
paddle/cuda/include/stub/hl_cnn_stub.h
+6
-4
paddle/cuda/src/hl_cuda_cnn.cu
paddle/cuda/src/hl_cuda_cnn.cu
+23
-17
paddle/gserver/layers/PoolProjection.cpp
paddle/gserver/layers/PoolProjection.cpp
+81
-0
paddle/gserver/layers/PoolProjection.h
paddle/gserver/layers/PoolProjection.h
+72
-0
paddle/gserver/layers/Projection.h
paddle/gserver/layers/Projection.h
+9
-4
paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
+128
-0
paddle/gserver/layers/SpatialPyramidPoolLayer.h
paddle/gserver/layers/SpatialPyramidPoolLayer.h
+54
-0
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+29
-3
paddle/math/Matrix.cpp
paddle/math/Matrix.cpp
+138
-150
proto/ModelConfig.proto.m4
proto/ModelConfig.proto.m4
+12
-0
未找到文件。
paddle/cuda/include/hl_cnn.h
浏览文件 @
cdac60f6
...
@@ -91,6 +91,7 @@ extern void hl_expand_feature2col(
...
@@ -91,6 +91,7 @@ extern void hl_expand_feature2col(
* @param[in] paddingH padding height.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
* @param[out] tgtData output data.
* @param[in] tgtStride output data stride.
*
*
*/
*/
extern
void
hl_maxpool_forward
(
extern
void
hl_maxpool_forward
(
...
@@ -100,7 +101,8 @@ extern void hl_maxpool_forward(
...
@@ -100,7 +101,8 @@ extern void hl_maxpool_forward(
const
int
pooledH
,
const
int
pooledW
,
const
int
pooledH
,
const
int
pooledW
,
const
int
sizeX
,
const
int
sizeY
,
const
int
sizeX
,
const
int
sizeY
,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
);
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
,
const
int
tgtStride
);
/**
/**
* @brief Maximum pool backward.
* @brief Maximum pool backward.
...
@@ -123,6 +125,7 @@ extern void hl_maxpool_forward(
...
@@ -123,6 +125,7 @@ extern void hl_maxpool_forward(
* @param[in] paddingH padding height.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[in] paddingW padding width.
* @param[out] targetGrad output grad.
* @param[out] targetGrad output grad.
* @param[in] outStride output grad data stride.
*
*
*/
*/
extern
void
hl_maxpool_backward
(
extern
void
hl_maxpool_backward
(
...
@@ -135,7 +138,7 @@ extern void hl_maxpool_backward(
...
@@ -135,7 +138,7 @@ extern void hl_maxpool_backward(
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
const
int
paddingH
,
const
int
paddingW
,
real
scaleA
,
real
scaleB
,
real
scaleA
,
real
scaleB
,
real
*
targetGrad
);
real
*
targetGrad
,
const
int
outStride
);
/**
/**
* @brief Averge pool forward.
* @brief Averge pool forward.
...
@@ -154,6 +157,7 @@ extern void hl_maxpool_backward(
...
@@ -154,6 +157,7 @@ extern void hl_maxpool_backward(
* @param[in] paddingH padding height.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
* @param[out] tgtData output data.
* @param[in] tgtStride output data stride.
*
*
*/
*/
extern
void
hl_avgpool_forward
(
extern
void
hl_avgpool_forward
(
...
@@ -163,7 +167,8 @@ extern void hl_avgpool_forward(
...
@@ -163,7 +167,8 @@ extern void hl_avgpool_forward(
const
int
pooledH
,
const
int
pooledW
,
const
int
pooledH
,
const
int
pooledW
,
const
int
sizeX
,
const
int
sizeY
,
const
int
sizeX
,
const
int
sizeY
,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
);
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
,
const
int
tgtStride
);
/**
/**
* @brief Maximum pool backward.
* @brief Maximum pool backward.
...
@@ -184,6 +189,7 @@ extern void hl_avgpool_forward(
...
@@ -184,6 +189,7 @@ extern void hl_avgpool_forward(
* @param[in] scaleA scale.
* @param[in] scaleA scale.
* @param[in] scaleB scale.
* @param[in] scaleB scale.
* @param[out] backGrad output grad.
* @param[out] backGrad output grad.
* @param[in] outStride output grad data stride.
*
*
*/
*/
extern
void
hl_avgpool_backward
(
extern
void
hl_avgpool_backward
(
...
@@ -195,7 +201,7 @@ extern void hl_avgpool_backward(
...
@@ -195,7 +201,7 @@ extern void hl_avgpool_backward(
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
int
paddingH
,
int
paddingW
,
int
paddingH
,
int
paddingW
,
real
scaleA
,
real
scaleB
,
real
scaleA
,
real
scaleB
,
real
*
backGrad
);
real
*
backGrad
,
const
int
outStride
);
/**
/**
* @brief Cross-map-respose normalize forward.
* @brief Cross-map-respose normalize forward.
...
...
paddle/cuda/include/stub/hl_cnn_stub.h
浏览文件 @
cdac60f6
...
@@ -44,7 +44,8 @@ inline void hl_maxpool_forward(
...
@@ -44,7 +44,8 @@ inline void hl_maxpool_forward(
const
int
pooledH
,
const
int
pooledW
,
const
int
pooledH
,
const
int
pooledW
,
const
int
sizeX
,
const
int
sizeY
,
const
int
sizeX
,
const
int
sizeY
,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
)
{}
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
,
const
int
tgtStride
)
{}
inline
void
hl_maxpool_backward
(
inline
void
hl_maxpool_backward
(
const
int
frameCnt
,
const
real
*
inputData
,
const
int
frameCnt
,
const
real
*
inputData
,
...
@@ -56,7 +57,7 @@ inline void hl_maxpool_backward(
...
@@ -56,7 +57,7 @@ inline void hl_maxpool_backward(
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
const
int
paddingH
,
const
int
paddingW
,
real
scaleA
,
real
scaleB
,
real
scaleA
,
real
scaleB
,
real
*
targetGrad
)
{}
real
*
targetGrad
,
const
int
outStride
)
{}
inline
void
hl_avgpool_forward
(
inline
void
hl_avgpool_forward
(
const
int
frameCnt
,
const
real
*
inputData
,
const
int
frameCnt
,
const
real
*
inputData
,
...
@@ -65,7 +66,8 @@ inline void hl_avgpool_forward(
...
@@ -65,7 +66,8 @@ inline void hl_avgpool_forward(
const
int
pooledH
,
const
int
pooledW
,
const
int
pooledH
,
const
int
pooledW
,
const
int
sizeX
,
const
int
sizeY
,
const
int
sizeX
,
const
int
sizeY
,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
)
{}
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
,
const
int
tgtStride
)
{}
inline
void
hl_avgpool_backward
(
inline
void
hl_avgpool_backward
(
const
int
frameCnt
,
const
real
*
outGrad
,
const
int
frameCnt
,
const
real
*
outGrad
,
...
@@ -76,7 +78,7 @@ inline void hl_avgpool_backward(
...
@@ -76,7 +78,7 @@ inline void hl_avgpool_backward(
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
int
paddingH
,
int
paddingW
,
int
paddingH
,
int
paddingW
,
real
scaleA
,
real
scaleB
,
real
scaleA
,
real
scaleB
,
real
*
backGrad
)
{}
real
*
backGrad
,
const
int
outStride
)
{}
inline
void
hl_CMRNorm_forward
(
inline
void
hl_CMRNorm_forward
(
size_t
frameCnt
,
const
real
*
in
,
real
*
scale
,
real
*
out
,
size_t
frameCnt
,
const
real
*
in
,
real
*
scale
,
real
*
out
,
...
...
paddle/cuda/src/hl_cuda_cnn.cu
浏览文件 @
cdac60f6
...
@@ -152,7 +152,7 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
...
@@ -152,7 +152,7 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
const
int
ksizeW
,
const
int
ksizeH
,
const
int
ksizeW
,
const
int
ksizeH
,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
offsetH
,
const
int
offsetW
,
const
int
offsetH
,
const
int
offsetW
,
real
*
tgtData
)
{
real
*
tgtData
,
const
int
tgtStride
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
nthreads
)
{
if
(
index
<
nthreads
)
{
int
pw
=
index
%
pooledW
;
int
pw
=
index
%
pooledW
;
...
@@ -173,7 +173,9 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
...
@@ -173,7 +173,9 @@ __global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
maxval
=
inputData
[
h
*
width
+
w
];
maxval
=
inputData
[
h
*
width
+
w
];
}
}
}
}
tgtData
[
index
]
=
maxval
;
int
tgtIndex
=
index
%
(
pooledW
*
pooledH
*
channels
)
+
frameNum
*
tgtStride
;
tgtData
[
tgtIndex
]
=
maxval
;
}
}
}
}
...
@@ -184,7 +186,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
...
@@ -184,7 +186,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
const
int
sizeX
,
const
int
sizeY
,
const
int
sizeX
,
const
int
sizeY
,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
)
{
real
*
tgtData
,
const
int
tgtStride
)
{
int
num_kernels
=
pooledH
*
pooledW
*
channels
*
frameCnt
;
int
num_kernels
=
pooledH
*
pooledW
*
channels
*
frameCnt
;
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
...
@@ -194,7 +196,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
...
@@ -194,7 +196,7 @@ void hl_maxpool_forward(const int frameCnt, const real* inputData,
KeMaxPoolForward
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
KeMaxPoolForward
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
num_kernels
,
inputData
,
channels
,
height
,
width
,
(
num_kernels
,
inputData
,
channels
,
height
,
width
,
pooledH
,
pooledW
,
sizeX
,
sizeY
,
strideH
,
strideW
,
pooledH
,
pooledW
,
sizeX
,
sizeY
,
strideH
,
strideW
,
paddingH
,
paddingW
,
tgtData
);
paddingH
,
paddingW
,
tgtData
,
tgtStride
);
CHECK_SYNC
(
"hl_maxpool_forward failed"
);
CHECK_SYNC
(
"hl_maxpool_forward failed"
);
}
}
...
@@ -207,7 +209,7 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
...
@@ -207,7 +209,7 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
padH
,
const
int
padW
,
const
int
padH
,
const
int
padW
,
real
scaleA
,
real
scaleB
,
real
scaleA
,
real
scaleB
,
real
*
targetGrad
)
{
real
*
targetGrad
,
const
int
outStride
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
nthreads
)
{
if
(
index
<
nthreads
)
{
// find out the local index
// find out the local index
...
@@ -223,8 +225,8 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
...
@@ -223,8 +225,8 @@ __global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
int
pwend
=
offsetW
>=
0
?
min
(
offsetW
/
strideW
+
1
,
pooledW
)
:
0
;
int
pwend
=
offsetW
>=
0
?
min
(
offsetW
/
strideW
+
1
,
pooledW
)
:
0
;
real
gradient
=
0
;
real
gradient
=
0
;
real
input
=
inputData
[
index
];
real
input
=
inputData
[
index
];
outData
+=
(
frameNum
*
channels
+
offsetC
)
*
pooledH
*
pooledW
;
outData
+=
(
frameNum
*
outStride
+
offsetC
*
pooledH
*
pooledW
)
;
outGrad
+=
(
frameNum
*
channels
+
offsetC
)
*
pooledH
*
pooledW
;
outGrad
+=
(
frameNum
*
outStride
+
offsetC
*
pooledH
*
pooledW
)
;
for
(
int
ph
=
phstart
;
ph
<
phend
;
++
ph
)
{
for
(
int
ph
=
phstart
;
ph
<
phend
;
++
ph
)
{
for
(
int
pw
=
pwstart
;
pw
<
pwend
;
++
pw
)
{
for
(
int
pw
=
pwstart
;
pw
<
pwend
;
++
pw
)
{
if
(
input
==
outData
[
ph
*
pooledW
+
pw
])
{
if
(
input
==
outData
[
ph
*
pooledW
+
pw
])
{
...
@@ -246,7 +248,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
...
@@ -246,7 +248,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
const
int
paddingH
,
const
int
paddingW
,
real
scaleA
,
real
scaleB
,
real
scaleA
,
real
scaleB
,
real
*
targetGrad
)
{
real
*
targetGrad
,
const
int
outStride
)
{
int
num_kernels
=
height
*
width
*
channels
*
frameCnt
;
int
num_kernels
=
height
*
width
*
channels
*
frameCnt
;
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
...
@@ -257,7 +259,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
...
@@ -257,7 +259,7 @@ void hl_maxpool_backward(const int frameCnt, const real* inputData,
strideH
,
strideW
,
strideH
,
strideW
,
paddingH
,
paddingW
,
paddingH
,
paddingW
,
scaleA
,
scaleB
,
scaleA
,
scaleB
,
targetGrad
);
targetGrad
,
outStride
);
CHECK_SYNC
(
"hl_maxpool_backward"
);
CHECK_SYNC
(
"hl_maxpool_backward"
);
}
}
...
@@ -268,7 +270,7 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
...
@@ -268,7 +270,7 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
const
int
sizeX
,
const
int
sizeY
,
const
int
sizeX
,
const
int
sizeY
,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
padH
,
const
int
padW
,
const
int
padH
,
const
int
padW
,
real
*
tgtData
)
{
real
*
tgtData
,
const
int
tgtStride
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
nthreads
)
{
if
(
index
<
nthreads
)
{
int
pw
=
index
%
pooledW
;
int
pw
=
index
%
pooledW
;
...
@@ -293,7 +295,9 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
...
@@ -293,7 +295,9 @@ __global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
aveval
+=
inputData
[
h
*
width
+
w
];
aveval
+=
inputData
[
h
*
width
+
w
];
}
}
}
}
tgtData
[
index
]
=
aveval
/
pool_size
;
int
tgtIndex
=
index
%
(
pooledW
*
pooledH
*
channels
)
+
frameNum
*
tgtStride
;
tgtData
[
tgtIndex
]
=
aveval
/
pool_size
;
}
}
}
}
...
@@ -303,14 +307,15 @@ void hl_avgpool_forward(const int frameCnt, const real* inputData,
...
@@ -303,14 +307,15 @@ void hl_avgpool_forward(const int frameCnt, const real* inputData,
const
int
pooledH
,
const
int
pooledW
,
const
int
pooledH
,
const
int
pooledW
,
const
int
sizeX
,
const
int
sizeY
,
const
int
sizeX
,
const
int
sizeY
,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
)
{
const
int
paddingH
,
const
int
paddingW
,
real
*
tgtData
,
const
int
tgtStride
)
{
int
num_kernels
=
pooledH
*
pooledW
*
channels
*
frameCnt
;
int
num_kernels
=
pooledH
*
pooledW
*
channels
*
frameCnt
;
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
KeAvgPoolForward
<<<
blocks
,
1024
,
0
,
STREAM_DEFAULT
>>>
KeAvgPoolForward
<<<
blocks
,
1024
,
0
,
STREAM_DEFAULT
>>>
(
num_kernels
,
inputData
,
channels
,
(
num_kernels
,
inputData
,
channels
,
height
,
width
,
pooledH
,
pooledW
,
height
,
width
,
pooledH
,
pooledW
,
sizeX
,
sizeY
,
strideH
,
strideW
,
sizeX
,
sizeY
,
strideH
,
strideW
,
paddingH
,
paddingW
,
tgtData
);
paddingH
,
paddingW
,
tgtData
,
tgtStride
);
CHECK_SYNC
(
"hl_avgpool_forward failed"
);
CHECK_SYNC
(
"hl_avgpool_forward failed"
);
}
}
...
@@ -322,7 +327,7 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
...
@@ -322,7 +327,7 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
padH
,
const
int
padW
,
const
int
padH
,
const
int
padW
,
real
scaleA
,
real
scaleB
,
real
scaleA
,
real
scaleB
,
real
*
tgtGrad
)
{
real
*
tgtGrad
,
const
int
outStride
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
nthreads
)
{
if
(
index
<
nthreads
)
{
int
offsetW
=
index
%
width
+
padW
;
int
offsetW
=
index
%
width
+
padW
;
...
@@ -335,7 +340,8 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
...
@@ -335,7 +340,8 @@ __global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
int
phend
=
offsetH
>=
0
?
min
(
offsetH
/
strideH
+
1
,
pooledH
)
:
0
;
int
phend
=
offsetH
>=
0
?
min
(
offsetH
/
strideH
+
1
,
pooledH
)
:
0
;
int
pwend
=
offsetW
>=
0
?
min
(
offsetW
/
strideW
+
1
,
pooledW
)
:
0
;
int
pwend
=
offsetW
>=
0
?
min
(
offsetW
/
strideW
+
1
,
pooledW
)
:
0
;
real
gradient
=
0
;
real
gradient
=
0
;
outGrad
+=
(
frameNum
*
channels
+
offsetC
)
*
pooledH
*
pooledW
;
outGrad
+=
(
frameNum
*
outStride
+
offsetC
*
pooledH
*
pooledW
);
for
(
int
ph
=
phstart
;
ph
<
phend
;
++
ph
)
{
for
(
int
ph
=
phstart
;
ph
<
phend
;
++
ph
)
{
for
(
int
pw
=
pwstart
;
pw
<
pwend
;
++
pw
)
{
for
(
int
pw
=
pwstart
;
pw
<
pwend
;
++
pw
)
{
...
@@ -360,7 +366,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
...
@@ -360,7 +366,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
const
int
strideH
,
const
int
strideW
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
const
int
paddingH
,
const
int
paddingW
,
real
scaleA
,
real
scaleB
,
real
scaleA
,
real
scaleB
,
real
*
backGrad
)
{
real
*
backGrad
,
const
int
outStride
)
{
int
num_kernels
=
height
*
width
*
channels
*
frameCnt
;
int
num_kernels
=
height
*
width
*
channels
*
frameCnt
;
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
...
@@ -370,7 +376,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
...
@@ -370,7 +376,7 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
strideH
,
strideW
,
strideH
,
strideW
,
paddingH
,
paddingW
,
paddingH
,
paddingW
,
scaleA
,
scaleB
,
scaleA
,
scaleB
,
backGrad
);
backGrad
,
outStride
);
CHECK_SYNC
(
"hl_avgpool_backward failed"
);
CHECK_SYNC
(
"hl_avgpool_backward failed"
);
}
}
...
...
paddle/gserver/layers/PoolProjection.cpp
0 → 100644
浏览文件 @
cdac60f6
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PoolProjection.h"
namespace
paddle
{
REGISTER_PROJECTION_CREATE_FUNC
(
pool2
,
&
PoolProjection
::
create
);
PoolProjection
*
PoolProjection
::
create
(
const
ProjectionConfig
&
config
,
ParameterPtr
parameter
,
bool
useGpu
)
{
const
std
::
string
&
pool
=
config
.
pool_conf
().
pool_type
();
if
(
pool
==
"max"
)
{
return
new
MaxPoolProjection
(
config
,
parameter
,
useGpu
);
}
else
if
(
pool
==
"avg"
)
{
return
new
AvgPoolProjection
(
config
,
parameter
,
useGpu
);
}
else
{
LOG
(
FATAL
)
<<
"Unknown pool type: "
<<
pool
;
return
nullptr
;
}
}
void
MaxPoolProjection
::
forward
()
{
MatrixPtr
inputV
=
in_
->
value
;
MatrixPtr
outV
=
out_
->
value
;
outV
->
maxPoolForward
(
*
inputV
,
imgSizeY_
,
imgSize_
,
channels_
,
sizeX_
,
sizeY_
,
strideY_
,
stride_
,
outputY_
,
outputX_
,
confPaddingY_
,
confPadding_
);
}
void
MaxPoolProjection
::
backward
(
const
UpdateCallback
&
callback
)
{
(
void
)
callback
;
MatrixPtr
outGrad
=
out_
->
grad
;
MatrixPtr
inputV
=
in_
->
value
;
MatrixPtr
outV
=
out_
->
value
;
MatrixPtr
inputGrad
=
in_
->
grad
;
if
(
NULL
==
inputGrad
)
{
return
;
}
inputGrad
->
maxPoolBackward
(
*
inputV
,
imgSizeY_
,
imgSize_
,
*
outGrad
,
*
outV
,
sizeX_
,
sizeY_
,
strideY_
,
stride_
,
outputY_
,
outputX_
,
1
,
1
,
confPaddingY_
,
confPadding_
);
}
void
AvgPoolProjection
::
forward
()
{
MatrixPtr
inputV
=
in_
->
value
;
MatrixPtr
outV
=
out_
->
value
;
outV
->
avgPoolForward
(
*
inputV
,
imgSizeY_
,
imgSize_
,
channels_
,
sizeX_
,
sizeY_
,
strideY_
,
stride_
,
outputY_
,
outputX_
,
confPaddingY_
,
confPadding_
);
}
void
AvgPoolProjection
::
backward
(
const
UpdateCallback
&
callback
)
{
(
void
)
callback
;
MatrixPtr
outputGrad
=
out_
->
grad
;
MatrixPtr
inputGrad
=
in_
->
grad
;
if
(
NULL
==
inputGrad
)
{
return
;
}
inputGrad
->
avgPoolBackward
(
*
outputGrad
,
imgSizeY_
,
imgSize_
,
sizeX_
,
sizeY_
,
strideY_
,
stride_
,
outputY_
,
outputX_
,
1
,
1
,
confPaddingY_
,
confPadding_
);
}
}
// namespace paddle
paddle/gserver/layers/PoolProjection.h
0 → 100644
浏览文件 @
cdac60f6
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Projection.h"
namespace
paddle
{
class
PoolProjection
:
public
Projection
{
protected:
size_t
imgSizeY_
,
imgSize_
;
size_t
outputY_
,
outputX_
;
size_t
strideY_
,
stride_
;
size_t
sizeY_
,
sizeX_
;
int
confPaddingY_
,
confPadding_
;
size_t
channels_
;
std
::
string
poolType_
;
public:
PoolProjection
(
const
ProjectionConfig
&
config
,
ParameterPtr
parameter
,
bool
useGpu
)
:
Projection
(
config
,
parameter
,
useGpu
)
{
const
PoolConfig
&
conf
=
config_
.
pool_conf
();
poolType_
=
conf
.
pool_type
();
channels_
=
conf
.
channels
();
sizeX_
=
conf
.
size_x
();
stride_
=
conf
.
stride
();
outputX_
=
conf
.
output_x
();
imgSize_
=
conf
.
img_size
();
confPadding_
=
conf
.
padding
();
sizeY_
=
conf
.
has_size_y
()
?
conf
.
size_y
()
:
conf
.
size_x
();
imgSizeY_
=
conf
.
has_img_size_y
()
?
conf
.
img_size_y
()
:
conf
.
img_size
();
strideY_
=
conf
.
has_stride_y
()
?
conf
.
stride_y
()
:
conf
.
stride
();
confPaddingY_
=
conf
.
has_padding_y
()
?
conf
.
padding_y
()
:
conf
.
padding
();
outputY_
=
conf
.
has_output_y
()
?
conf
.
output_y
()
:
conf
.
output_x
();
}
static
PoolProjection
*
create
(
const
ProjectionConfig
&
config
,
ParameterPtr
parameter
,
bool
useGpu
);
const
std
::
string
&
getPoolType
()
const
{
return
poolType_
;
}
};
class
MaxPoolProjection
:
public
PoolProjection
{
public:
MaxPoolProjection
(
const
ProjectionConfig
&
config
,
ParameterPtr
parameter
,
bool
useGpu
)
:
PoolProjection
(
config
,
parameter
,
useGpu
)
{}
virtual
void
forward
();
virtual
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
);
};
class
AvgPoolProjection
:
public
PoolProjection
{
public:
AvgPoolProjection
(
const
ProjectionConfig
&
config
,
ParameterPtr
parameter
,
bool
useGpu
)
:
PoolProjection
(
config
,
parameter
,
useGpu
)
{}
virtual
void
forward
();
virtual
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
);
};
}
// namespace paddle
paddle/gserver/layers/Projection.h
浏览文件 @
cdac60f6
...
@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include "paddle/parameter/Parameter.h"
#include "ModelConfig.pb.h"
#include "Layer.h"
#include "Layer.h"
#include "ModelConfig.pb.h"
#include "paddle/parameter/Parameter.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -28,6 +27,11 @@ namespace paddle {
...
@@ -28,6 +27,11 @@ namespace paddle {
Projection::registrar_.registerClass<__class_name>(#__type_name); \
Projection::registrar_.registerClass<__class_name>(#__type_name); \
})
})
#define REGISTER_PROJECTION_CREATE_FUNC(__type_name, createFunction) \
static InitFunction __reg_type_##__type_name([]() { \
Projection::registrar_.registerClass(#__type_name, createFunction); \
})
/**
/**
* A projection takes one Argument as input, calculate the result and add it
* A projection takes one Argument as input, calculate the result and add it
* to output Argument.
* to output Argument.
...
@@ -50,7 +54,8 @@ public:
...
@@ -50,7 +54,8 @@ public:
registrar_
;
registrar_
;
/**
/**
* Forward propagation. If backward() will be called, in and out must be kept valid until then.
* Forward propagation. If backward() will be called, in and out must be kept
* valid until then.
* @param in input of projection
* @param in input of projection
* @param out output of projection
* @param out output of projection
* @param passType PASS_TRAIN of PASS_TEST
* @param passType PASS_TRAIN of PASS_TEST
...
...
paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
0 → 100644
浏览文件 @
cdac60f6
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "SpatialPyramidPoolLayer.h"
namespace
paddle
{
REGISTER_LAYER
(
spp
,
SpatialPyramidPoolLayer
);
ProjectionConfig
SpatialPyramidPoolLayer
::
getConfig
(
size_t
imgSizeW
,
size_t
imgSizeH
,
size_t
channels
,
size_t
pyramidLevel
,
std
::
string
&
poolType
)
{
ProjectionConfig
config
;
config
.
set_type
(
"pool2"
);
PoolConfig
*
conf
=
config
.
mutable_pool_conf
();
conf
->
set_channels
(
channels
);
conf
->
set_img_size
(
imgSizeW
);
conf
->
set_img_size_y
(
imgSizeH
);
conf
->
set_pool_type
(
poolType
);
int
numBins
=
std
::
pow
(
2
,
pyramidLevel
);
int
sizeH
=
std
::
ceil
(
imgSizeH
/
static_cast
<
double
>
(
numBins
));
int
remainderH
=
sizeH
*
numBins
-
imgSizeH
;
int
paddingH
=
(
remainderH
+
1
)
/
2
;
int
outSizeH
=
outputSize
(
imgSizeH
,
sizeH
,
paddingH
,
sizeH
);
int
sizeW
=
std
::
ceil
(
imgSizeW
/
static_cast
<
double
>
(
numBins
));
int
remainderW
=
sizeW
*
numBins
-
imgSizeW
;
int
paddingW
=
(
remainderW
+
1
)
/
2
;
int
outSizeW
=
outputSize
(
imgSizeW
,
sizeW
,
paddingW
,
sizeW
);
conf
->
set_stride
(
sizeW
);
conf
->
set_stride_y
(
sizeH
);
conf
->
set_size_x
(
sizeW
);
conf
->
set_size_y
(
sizeH
);
conf
->
set_padding
(
paddingW
);
conf
->
set_padding_y
(
paddingH
);
conf
->
set_output_x
(
outSizeW
);
conf
->
set_output_y
(
outSizeH
);
config
.
set_output_size
(
outSizeH
*
outSizeW
*
channels
);
return
config
;
}
void
SpatialPyramidPoolLayer
::
splitInput
(
Argument
&
input
,
size_t
height
,
size_t
width
,
bool
useGpu
)
{
input
.
value
=
getInput
(
0
).
value
;
if
(
passType_
!=
PASS_TEST
&&
needGradient
())
{
Matrix
::
resizeOrCreate
(
input
.
grad
,
height
,
width
,
/* trans */
false
,
useGpu
);
input
.
grad
->
zeroMem
();
}
}
bool
SpatialPyramidPoolLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
Layer
::
init
(
layerMap
,
parameterMap
);
CHECK_EQ
(
config_
.
inputs_size
(),
1
);
const
SppConfig
&
sppConf
=
config_
.
inputs
(
0
).
spp_conf
();
pyramidHeight_
=
sppConf
.
pyramid_height
();
poolType_
=
sppConf
.
pool_type
();
channels_
=
sppConf
.
channels
();
imgSizeW_
=
sppConf
.
img_size
();
imgSizeH_
=
sppConf
.
has_img_size_y
()
?
sppConf
.
img_size_y
()
:
imgSizeW_
;
poolProjections_
.
reserve
(
pyramidHeight_
);
projCol_
.
reserve
(
pyramidHeight_
);
projInput_
.
reserve
(
pyramidHeight_
);
projOutput_
.
resize
(
pyramidHeight_
);
size_t
startCol
=
0
;
size_t
endCol
=
0
;
for
(
size_t
i
=
0
;
i
<
pyramidHeight_
;
i
++
)
{
poolProjections_
.
emplace_back
(
PoolProjection
::
create
(
getConfig
(
imgSizeW_
,
imgSizeH_
,
channels_
,
i
,
poolType_
),
nullptr
,
useGpu_
));
endCol
+=
poolProjections_
[
i
]
->
getOutputSize
();
projCol_
.
push_back
(
std
::
make_pair
(
startCol
,
endCol
));
startCol
=
endCol
;
projInput_
.
emplace_back
(
Argument
());
}
outputSize_
=
endCol
;
return
true
;
}
void
SpatialPyramidPoolLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
int
batchSize
=
getInput
(
0
).
getBatchSize
();
resetOutput
(
batchSize
,
outputSize_
);
for
(
size_t
i
=
0
;
i
<
pyramidHeight_
;
i
++
)
{
size_t
startCol
=
projCol_
[
i
].
first
;
size_t
endCol
=
projCol_
[
i
].
second
;
projOutput_
[
i
].
value
=
output_
.
value
->
subColMatrix
(
startCol
,
endCol
);
projOutput_
[
i
].
grad
=
output_
.
grad
->
subColMatrix
(
startCol
,
endCol
);
splitInput
(
projInput_
[
i
],
getInput
(
0
).
value
->
getHeight
(),
getInput
(
0
).
value
->
getWidth
(),
useGpu_
);
}
for
(
size_t
i
=
0
;
i
<
pyramidHeight_
;
i
++
)
{
poolProjections_
[
i
]
->
forward
(
&
projInput_
[
i
],
&
projOutput_
[
i
],
passType
);
}
}
void
SpatialPyramidPoolLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
for
(
size_t
i
=
0
;
i
<
pyramidHeight_
;
i
++
)
{
if
(
poolProjections_
[
i
])
{
poolProjections_
[
i
]
->
backward
(
callback
);
getInput
(
0
).
grad
->
add
(
*
projInput_
[
i
].
grad
);
}
}
}
}
// namespace paddle
paddle/gserver/layers/SpatialPyramidPoolLayer.h
0 → 100644
浏览文件 @
cdac60f6
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
#include "PoolProjection.h"
#include "paddle/utils/Logging.h"
namespace
paddle
{
class
SpatialPyramidPoolLayer
:
public
Layer
{
protected:
size_t
channels_
;
size_t
imgSizeW_
;
size_t
imgSizeH_
;
size_t
pyramidHeight_
;
size_t
outputSize_
;
std
::
string
poolType_
;
std
::
vector
<
std
::
unique_ptr
<
PoolProjection
>>
poolProjections_
;
std
::
vector
<
Argument
>
projInput_
;
std
::
vector
<
Argument
>
projOutput_
;
std
::
vector
<
std
::
pair
<
size_t
,
size_t
>>
projCol_
;
public:
explicit
SpatialPyramidPoolLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
~
SpatialPyramidPoolLayer
()
{}
virtual
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
);
ProjectionConfig
getConfig
(
size_t
sizeX_
,
size_t
sizeY_
,
size_t
channels
,
size_t
pyamidLevel_
,
std
::
string
&
poolType_
);
int
outputSize
(
int
imageSize
,
int
windowSize
,
int
padding
,
int
stride
)
{
return
(
imageSize
-
windowSize
+
2
*
padding
)
/
stride
+
1
;
}
virtual
void
forward
(
PassType
passType
);
virtual
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
);
void
splitInput
(
Argument
&
input
,
size_t
height
,
size_t
width
,
bool
useGpu
);
};
}
// namespace paddle
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
cdac60f6
...
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
...
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <vector>
#include <string>
#include <string>
#include
"paddle/gserver/layers/DataLayer.h"
#include
<vector>
#include "ModelConfig.pb.h"
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/trainer/Trainer.h"
#include "TestUtil.h"
#include "LayerGradUtil.h"
#include "LayerGradUtil.h"
#include "TestUtil.h"
using
namespace
paddle
;
// NOLINT
using
namespace
paddle
;
// NOLINT
using
namespace
std
;
// NOLINT
using
namespace
std
;
// NOLINT
...
@@ -880,6 +880,32 @@ TEST(Layer, PoolLayer) {
...
@@ -880,6 +880,32 @@ TEST(Layer, PoolLayer) {
#endif
#endif
}
}
void
testSppLayer
(
const
string
&
poolType
,
const
int
pyramidHeight
,
bool
trans
,
bool
useGpu
)
{
TestConfig
config
;
config
.
layerConfig
.
set_type
(
"spp"
);
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
3200
,
0
});
LayerInputConfig
*
input
=
config
.
layerConfig
.
add_inputs
();
SppConfig
*
sppConfig
=
input
->
mutable_spp_conf
();
sppConfig
->
set_pool_type
(
poolType
);
sppConfig
->
set_pyramid_height
(
pyramidHeight
);
sppConfig
->
set_channels
(
16
);
sppConfig
->
set_img_size
(
10
);
sppConfig
->
set_img_size_y
(
20
);
testLayerGrad
(
config
,
"spp"
,
100
,
trans
,
useGpu
);
}
TEST
(
Layer
,
SpatialPyramidPoolLayer
)
{
for
(
auto
useGpu
:
{
false
,
true
})
{
testSppLayer
(
"avg"
,
1
,
false
,
useGpu
);
testSppLayer
(
"avg"
,
3
,
false
,
useGpu
);
testSppLayer
(
"avg"
,
5
,
false
,
useGpu
);
testSppLayer
(
"max"
,
1
,
false
,
useGpu
);
testSppLayer
(
"max"
,
3
,
false
,
useGpu
);
testSppLayer
(
"avg"
,
5
,
false
,
useGpu
);
}
}
TEST
(
Layer
,
rankCostLayer
)
{
TEST
(
Layer
,
rankCostLayer
)
{
TestConfig
config
;
TestConfig
config
;
config
.
layerConfig
.
set_type
(
"rank-cost"
);
config
.
layerConfig
.
set_type
(
"rank-cost"
);
...
...
paddle/math/Matrix.cpp
浏览文件 @
cdac60f6
...
@@ -13,19 +13,19 @@ See the License for the specific language governing permissions and
...
@@ -13,19 +13,19 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "Matrix.h"
#include "Matrix.h"
#include "MathFunctions.h"
#include "SparseMatrix.h"
#include "SparseMatrix.h"
#include "SparseRowMatrix.h"
#include "SparseRowMatrix.h"
#include "MathFunctions.h"
#include <cmath>
#include <float.h>
#include <float.h>
#include <algorithm>
#include <algorithm>
#include <cmath>
#include "paddle/utils/Logging.h"
#include <string.h>
#include <string.h>
#include "hl_gpu.h"
#include "hl_gpu.h"
#include "hl_table_apply.h"
#include "hl_table_apply.h"
#include "hl_top_k.h"
#include "hl_top_k.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/ThreadLocal.h"
#include "paddle/utils/ThreadLocal.h"
...
@@ -42,9 +42,9 @@ inline real _safelog(real a) { return a > 0.0f ? std::log(a) : -40.0f; }
...
@@ -42,9 +42,9 @@ inline real _safelog(real a) { return a > 0.0f ? std::log(a) : -40.0f; }
Matrix
::
Matrix
(
MemoryHandlePtr
memHandle
,
size_t
height
,
size_t
width
,
Matrix
::
Matrix
(
MemoryHandlePtr
memHandle
,
size_t
height
,
size_t
width
,
bool
trans
,
bool
use_gpu
)
bool
trans
,
bool
use_gpu
)
:
BaseMatrix
(
:
BaseMatrix
(
height
,
width
,
height
,
width
,
memHandle
?
(
reinterpret_cast
<
real
*>
(
memHandle
->
getBuf
()))
:
nullptr
,
memHandle
?
(
reinterpret_cast
<
real
*>
(
memHandle
->
getBuf
()))
:
nullptr
,
trans
,
use_gpu
)
{
trans
,
use_gpu
)
{
elementCnt_
=
width
*
height
;
elementCnt_
=
width
*
height
;
memoryHandle_
=
memHandle
;
memoryHandle_
=
memHandle
;
}
}
...
@@ -95,7 +95,7 @@ MatrixPtr Matrix::create(MemoryHandlePtr memHandle, size_t height, size_t width,
...
@@ -95,7 +95,7 @@ MatrixPtr Matrix::create(MemoryHandlePtr memHandle, size_t height, size_t width,
if
(
auto
gpuHandle
=
std
::
dynamic_pointer_cast
<
GpuMemoryHandle
>
(
memHandle
))
{
if
(
auto
gpuHandle
=
std
::
dynamic_pointer_cast
<
GpuMemoryHandle
>
(
memHandle
))
{
return
std
::
make_shared
<
GpuMatrix
>
(
gpuHandle
,
height
,
width
,
trans
);
return
std
::
make_shared
<
GpuMatrix
>
(
gpuHandle
,
height
,
width
,
trans
);
}
else
if
(
auto
cpuHandle
=
}
else
if
(
auto
cpuHandle
=
std
::
dynamic_pointer_cast
<
CpuMemoryHandle
>
(
memHandle
))
{
std
::
dynamic_pointer_cast
<
CpuMemoryHandle
>
(
memHandle
))
{
return
std
::
make_shared
<
CpuMatrix
>
(
cpuHandle
,
height
,
width
,
trans
);
return
std
::
make_shared
<
CpuMatrix
>
(
cpuHandle
,
height
,
width
,
trans
);
}
else
{
}
else
{
LOG
(
FATAL
)
<<
"Wrong"
;
LOG
(
FATAL
)
<<
"Wrong"
;
...
@@ -343,19 +343,17 @@ void GpuMatrix::addBias(Matrix& b, real scale) {
...
@@ -343,19 +343,17 @@ void GpuMatrix::addBias(Matrix& b, real scale) {
void
GpuMatrix
::
collectBias
(
Matrix
&
a
,
real
scale
)
{
void
GpuMatrix
::
collectBias
(
Matrix
&
a
,
real
scale
)
{
CHECK_EQ
(
getHeight
(),
(
size_t
)
1
);
CHECK_EQ
(
getHeight
(),
(
size_t
)
1
);
CHECK_EQ
(
width_
,
a
.
getWidth
());
CHECK_EQ
(
width_
,
a
.
getWidth
());
GpuSparseMatrix
*
sMatPtr
=
dynamic_cast
<
GpuSparseMatrix
*>
(
&
a
);
GpuSparseMatrix
*
sMatPtr
=
dynamic_cast
<
GpuSparseMatrix
*>
(
&
a
);
if
(
!
sMatPtr
)
{
if
(
!
sMatPtr
)
{
sumCols
(
a
,
scale
);
sumCols
(
a
,
scale
);
}
else
{
}
else
{
real
*
data
=
getData
();
real
*
data
=
getData
();
hl_sparse_matrix_s
A_d
=
sMatPtr
->
sMatrix_
.
get
();
hl_sparse_matrix_s
A_d
=
sMatPtr
->
sMatrix_
.
get
();
hl_sparse_matrix_column_sum
(
data
,
A_d
,
sMatPtr
->
getHeight
(),
hl_sparse_matrix_column_sum
(
data
,
A_d
,
sMatPtr
->
getHeight
(),
width_
,
scale
);
width_
,
scale
);
}
}
}
}
void
GpuMatrix
::
sequenceAvgForward
(
Matrix
&
a
,
void
GpuMatrix
::
sequenceAvgForward
(
Matrix
&
a
,
const
IVector
&
startsPos
,
const
IVector
&
startsPos
,
int
mode
)
{
int
mode
)
{
size_t
height
=
getHeight
();
size_t
height
=
getHeight
();
size_t
width
=
getWidth
();
size_t
width
=
getWidth
();
...
@@ -401,8 +399,8 @@ void GpuMatrix::mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB,
...
@@ -401,8 +399,8 @@ void GpuMatrix::mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB,
hl_trans_op_t
transa
=
!
a
.
isTransposed
()
?
HPPL_OP_N
:
HPPL_OP_T
;
hl_trans_op_t
transa
=
!
a
.
isTransposed
()
?
HPPL_OP_N
:
HPPL_OP_T
;
hl_trans_op_t
transb
=
!
b
.
isTransposed
()
?
HPPL_OP_N
:
HPPL_OP_T
;
hl_trans_op_t
transb
=
!
b
.
isTransposed
()
?
HPPL_OP_N
:
HPPL_OP_T
;
hl_matrix_mul
(
A_d
,
transa
,
B_d
,
transb
,
C_d
,
dimM
,
dimN
,
dimK
,
hl_matrix_mul
(
A_d
,
transa
,
B_d
,
transb
,
C_d
,
dimM
,
dimN
,
dimK
,
scaleAB
,
scale
AB
,
scale
T
,
lda
,
ldb
,
ldc
);
scaleT
,
lda
,
ldb
,
ldc
);
}
}
void
GpuMatrix
::
mul
(
const
GpuSparseMatrix
&
a
,
const
GpuMatrix
&
b
,
real
scaleAB
,
void
GpuMatrix
::
mul
(
const
GpuSparseMatrix
&
a
,
const
GpuMatrix
&
b
,
real
scaleAB
,
...
@@ -423,8 +421,8 @@ void GpuMatrix::mul(const GpuSparseMatrix& a, const GpuMatrix& b, real scaleAB,
...
@@ -423,8 +421,8 @@ void GpuMatrix::mul(const GpuSparseMatrix& a, const GpuMatrix& b, real scaleAB,
hl_sparse_matrix_s
A_d
=
a
.
sMatrix_
.
get
();
hl_sparse_matrix_s
A_d
=
a
.
sMatrix_
.
get
();
real
*
B_d
=
b
.
data_
;
real
*
B_d
=
b
.
data_
;
real
*
C_d
=
data_
;
real
*
C_d
=
data_
;
hl_matrix_csr_mul_dense
(
A_d
,
transA
,
B_d
,
HPPL_OP_N
,
C_d
,
height_
,
hl_matrix_csr_mul_dense
(
A_d
,
transA
,
B_d
,
HPPL_OP_N
,
C_d
,
height_
,
width_
,
width_
,
b
.
height_
,
scaleAB
,
scaleT
);
b
.
height_
,
scaleAB
,
scaleT
);
}
}
void
GpuMatrix
::
mul
(
const
GpuMatrix
&
a
,
const
GpuSparseMatrix
&
b
,
real
scaleAB
,
void
GpuMatrix
::
mul
(
const
GpuMatrix
&
a
,
const
GpuSparseMatrix
&
b
,
real
scaleAB
,
...
@@ -445,11 +443,11 @@ void GpuMatrix::mul(const GpuMatrix& a, const GpuSparseMatrix& b, real scaleAB,
...
@@ -445,11 +443,11 @@ void GpuMatrix::mul(const GpuMatrix& a, const GpuSparseMatrix& b, real scaleAB,
<<
"Matrix dimensions are not equal"
;
<<
"Matrix dimensions are not equal"
;
}
}
if
(
b
.
format_
==
SPARSE_CSC
)
{
if
(
b
.
format_
==
SPARSE_CSC
)
{
hl_matrix_dense_mul_csc
(
A_d
,
HPPL_OP_N
,
B_d
,
transB
,
C_d
,
height_
,
hl_matrix_dense_mul_csc
(
A_d
,
HPPL_OP_N
,
B_d
,
transB
,
C_d
,
height_
,
width_
,
width_
,
a
.
width_
,
scaleAB
,
scaleT
);
a
.
width_
,
scaleAB
,
scaleT
);
}
else
{
}
else
{
hl_matrix_dense_mul_csr
(
A_d
,
HPPL_OP_N
,
B_d
,
transB
,
C_d
,
height_
,
hl_matrix_dense_mul_csr
(
A_d
,
HPPL_OP_N
,
B_d
,
transB
,
C_d
,
height_
,
width_
,
width_
,
a
.
width_
,
scaleAB
,
scaleT
);
a
.
width_
,
scaleAB
,
scaleT
);
}
}
}
}
...
@@ -511,8 +509,8 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
...
@@ -511,8 +509,8 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
size_t
tableSize
=
table
.
getHeight
();
size_t
tableSize
=
table
.
getHeight
();
int
*
index
=
ids
.
getData
();
int
*
index
=
ids
.
getData
();
hl_matrix_select_rows
(
a
,
stride_
,
table
.
getData
(),
table
.
stride_
,
hl_matrix_select_rows
(
a
,
stride_
,
table
.
getData
(),
table
.
stride_
,
index
,
index
,
numSamples
,
tableSize
,
dim
);
numSamples
,
tableSize
,
dim
);
#endif
#endif
}
}
...
@@ -529,8 +527,8 @@ void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
...
@@ -529,8 +527,8 @@ void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
size_t
tableSize
=
table
.
getHeight
();
size_t
tableSize
=
table
.
getHeight
();
int
*
index
=
ids
.
getData
();
int
*
index
=
ids
.
getData
();
hl_matrix_add_to_rows
(
table
.
getData
(),
table
.
stride_
,
a
,
stride_
,
hl_matrix_add_to_rows
(
table
.
getData
(),
table
.
stride_
,
a
,
stride_
,
index
,
index
,
numSamples
,
tableSize
,
dim
);
numSamples
,
tableSize
,
dim
);
#endif
#endif
}
}
...
@@ -565,13 +563,8 @@ void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
...
@@ -565,13 +563,8 @@ void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
CHECK_EQ
(
maxIds
.
getSize
(),
numSamples
*
beam
);
CHECK_EQ
(
maxIds
.
getSize
(),
numSamples
*
beam
);
CHECK_EQ
(
maxVal
.
getHeight
(),
numSamples
);
CHECK_EQ
(
maxVal
.
getHeight
(),
numSamples
);
hl_matrix_top_k
(
maxVal
.
getData
(),
hl_matrix_top_k
(
maxVal
.
getData
(),
maxVal
.
getStride
(),
maxIds
.
getData
(),
maxVal
.
getStride
(),
this
->
getData
(),
this
->
getStride
(),
this
->
getWidth
(),
beam
,
maxIds
.
getData
(),
this
->
getData
(),
this
->
getStride
(),
this
->
getWidth
(),
beam
,
numSamples
);
numSamples
);
#endif
#endif
}
}
...
@@ -595,12 +588,12 @@ void GpuMatrix::maxoutForward(Matrix& a, IVector& id, size_t channels,
...
@@ -595,12 +588,12 @@ void GpuMatrix::maxoutForward(Matrix& a, IVector& id, size_t channels,
size_t
size
=
getWidth
();
size_t
size
=
getWidth
();
size_t
batchSize
=
getHeight
();
size_t
batchSize
=
getHeight
();
const
real
*
input
=
a
.
getData
();
const
real
*
input
=
a
.
getData
();
real
*
output
=
getData
();
real
*
output
=
getData
();
int
*
idForGpu
=
id
.
getData
();
int
*
idForGpu
=
id
.
getData
();
hl_maxout_forward
(
input
,
output
,
idForGpu
,
batchSize
,
size
,
hl_maxout_forward
(
input
,
output
,
idForGpu
,
batchSize
,
size
,
size
/
channels
,
size
/
channels
,
groups
);
groups
);
}
}
void
GpuMatrix
::
maxoutBackward
(
Matrix
&
a
,
IVector
&
id
,
size_t
channels
,
void
GpuMatrix
::
maxoutBackward
(
Matrix
&
a
,
IVector
&
id
,
size_t
channels
,
...
@@ -611,12 +604,12 @@ void GpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels,
...
@@ -611,12 +604,12 @@ void GpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels,
size_t
size
=
a
.
getWidth
();
size_t
size
=
a
.
getWidth
();
size_t
batchSize
=
getHeight
();
size_t
batchSize
=
getHeight
();
real
*
input
=
getData
();
real
*
input
=
getData
();
const
real
*
output
=
a
.
getData
();
const
real
*
output
=
a
.
getData
();
const
int
*
idForGpu
=
id
.
getData
();
const
int
*
idForGpu
=
id
.
getData
();
hl_maxout_backward
(
input
,
output
,
idForGpu
,
batchSize
,
size
,
hl_maxout_backward
(
input
,
output
,
idForGpu
,
batchSize
,
size
,
size
/
channels
,
size
/
channels
,
groups
);
groups
);
}
}
/*calulate the error of classification */
/*calulate the error of classification */
...
@@ -632,8 +625,8 @@ void GpuMatrix::classificationError(MatrixPtr output, IVectorPtr label) {
...
@@ -632,8 +625,8 @@ void GpuMatrix::classificationError(MatrixPtr output, IVectorPtr label) {
real
*
recResult_d
=
data_
;
real
*
recResult_d
=
data_
;
int
*
label_d
=
label_ptr
->
getData
();
int
*
label_d
=
label_ptr
->
getData
();
hl_matrix_classification_error
(
output_d
,
label_d
,
recResult_d
,
hl_matrix_classification_error
(
output_d
,
label_d
,
recResult_d
,
height_
,
height_
,
output_ptr
->
width_
);
output_ptr
->
width_
);
}
}
/* copy -log(output[i * width + label]) to this->data[i] */
/* copy -log(output[i * width + label]) to this->data[i] */
...
@@ -702,8 +695,7 @@ void GpuMatrix::sequenceSoftmax(Matrix& output, const IVector& index) {
...
@@ -702,8 +695,7 @@ void GpuMatrix::sequenceSoftmax(Matrix& output, const IVector& index) {
real
*
outputData
=
output
.
getData
();
real
*
outputData
=
output
.
getData
();
auto
starts
=
index
.
getData
();
auto
starts
=
index
.
getData
();
int
numSequences
=
index
.
getSize
()
-
1
;
int
numSequences
=
index
.
getSize
()
-
1
;
hl_sequence_softmax_forward
(
inputData
,
outputData
,
hl_sequence_softmax_forward
(
inputData
,
outputData
,
starts
,
numSequences
);
starts
,
numSequences
);
}
}
void
GpuMatrix
::
softmaxDerivative
(
Matrix
&
output
,
Matrix
&
sftmaxSum
)
{
void
GpuMatrix
::
softmaxDerivative
(
Matrix
&
output
,
Matrix
&
sftmaxSum
)
{
...
@@ -717,8 +709,7 @@ void GpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) {
...
@@ -717,8 +709,7 @@ void GpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) {
real
*
output_d
=
output
.
data_
;
real
*
output_d
=
output
.
data_
;
real
*
sftmaxSum_d
=
sftmaxSum
.
data_
;
real
*
sftmaxSum_d
=
sftmaxSum
.
data_
;
real
*
grad_d
=
data_
;
real
*
grad_d
=
data_
;
hl_matrix_softmax_derivative
(
grad_d
,
output_d
,
sftmaxSum_d
,
height_
,
hl_matrix_softmax_derivative
(
grad_d
,
output_d
,
sftmaxSum_d
,
height_
,
width_
);
width_
);
}
}
void
GpuMatrix
::
softmaxBackward
(
Matrix
&
outputV
)
{
void
GpuMatrix
::
softmaxBackward
(
Matrix
&
outputV
)
{
...
@@ -769,7 +760,7 @@ void GpuMatrix::scaledTanh(Matrix& output, real p1, real p2) {
...
@@ -769,7 +760,7 @@ void GpuMatrix::scaledTanh(Matrix& output, real p1, real p2) {
}
}
void
GpuMatrix
::
cosSim
(
Matrix
&
output1
,
Matrix
&
output2
,
real
scale
)
{
void
GpuMatrix
::
cosSim
(
Matrix
&
output1
,
Matrix
&
output2
,
real
scale
)
{
CHECK
(
output1
.
useGpu_
==
true
&&
output2
.
useGpu_
==
true
)
CHECK
(
output1
.
useGpu_
==
true
&&
output2
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
<<
"Matrix type are not equal"
;
size_t
numSamples
=
getHeight
();
size_t
numSamples
=
getHeight
();
size_t
dim
=
output1
.
getWidth
();
size_t
dim
=
output1
.
getWidth
();
CHECK_EQ
(
getWidth
(),
1UL
);
CHECK_EQ
(
getWidth
(),
1UL
);
...
@@ -778,15 +769,15 @@ void GpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) {
...
@@ -778,15 +769,15 @@ void GpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) {
real
*
out
=
getData
();
real
*
out
=
getData
();
real
*
x
=
output1
.
getData
();
real
*
x
=
output1
.
getData
();
real
*
y
=
output2
.
getData
();
real
*
y
=
output2
.
getData
();
hl_cossim
(
out
,
x
,
y
,
hl_cossim
(
out
,
x
,
y
,
dim
,
output1
.
getHeight
(),
output2
.
getHeight
(),
scale
);
dim
,
output1
.
getHeight
(),
output2
.
getHeight
(),
scale
);
}
}
void
GpuMatrix
::
cosSimDerivative
(
Matrix
&
output
,
Matrix
&
prevOut1
,
void
GpuMatrix
::
cosSimDerivative
(
Matrix
&
output
,
Matrix
&
prevOut1
,
Matrix
&
prevOut2
,
Matrix
&
prevGrad1
,
Matrix
&
prevOut2
,
Matrix
&
prevGrad1
,
Matrix
&
prevGrad2
,
real
scale
)
{
Matrix
&
prevGrad2
,
real
scale
)
{
CHECK
(
output
.
useGpu_
==
true
&&
prevOut1
.
useGpu_
==
true
&&
CHECK
(
output
.
useGpu_
==
true
&&
prevOut1
.
useGpu_
==
true
&&
prevOut2
.
useGpu_
==
true
&&
prevGrad1
.
useGpu_
==
true
&&
prevOut2
.
useGpu_
==
true
&&
prevGrad1
.
useGpu_
==
true
&&
prevGrad2
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
prevGrad2
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
CHECK_EQ
(
getWidth
(),
1UL
);
CHECK_EQ
(
getWidth
(),
1UL
);
CHECK_EQ
(
output
.
getWidth
(),
1UL
);
CHECK_EQ
(
output
.
getWidth
(),
1UL
);
...
@@ -806,9 +797,8 @@ void GpuMatrix::cosSimDerivative(Matrix& output, Matrix& prevOut1,
...
@@ -806,9 +797,8 @@ void GpuMatrix::cosSimDerivative(Matrix& output, Matrix& prevOut1,
real
*
prevOutY
=
prevOut2
.
getData
();
real
*
prevOutY
=
prevOut2
.
getData
();
real
*
prevGradX
=
prevGrad1
.
getData
();
real
*
prevGradX
=
prevGrad1
.
getData
();
real
*
prevGradY
=
prevGrad2
.
getData
();
real
*
prevGradY
=
prevGrad2
.
getData
();
hl_cossim_derivative
(
grad
,
out
,
prevOutX
,
prevOutY
,
hl_cossim_derivative
(
grad
,
out
,
prevOutX
,
prevOutY
,
prevGradX
,
prevGradY
,
dim
,
prevGradX
,
prevGradY
,
dim
,
prevOut1
.
getHeight
(),
prevOut2
.
getHeight
(),
scale
);
prevOut1
.
getHeight
(),
prevOut2
.
getHeight
(),
scale
);
}
}
void
GpuMatrix
::
randomizeUniform
()
{
void
GpuMatrix
::
randomizeUniform
()
{
...
@@ -859,8 +849,8 @@ void GpuMatrix::check(std::ostream& os, Matrix& refMat, bool printDiff) {
...
@@ -859,8 +849,8 @@ void GpuMatrix::check(std::ostream& os, Matrix& refMat, bool printDiff) {
void
GpuMatrix
::
convExpand
(
Matrix
&
feature
,
int
feaImgHeight
,
int
feaImgWidth
,
void
GpuMatrix
::
convExpand
(
Matrix
&
feature
,
int
feaImgHeight
,
int
feaImgWidth
,
int
channels
,
int
blockH
,
int
blockW
,
int
strideH
,
int
channels
,
int
blockH
,
int
blockW
,
int
strideH
,
int
strideW
,
int
paddingH
,
int
paddingW
,
int
strideW
,
int
paddingH
,
int
paddingW
,
int
outputH
,
int
output
H
,
int
output
W
)
{
int
outputW
)
{
CHECK
(
feature
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
CHECK
(
feature
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
CHECK_EQ
(
size_t
(
feaImgHeight
*
feaImgWidth
*
channels
),
CHECK_EQ
(
size_t
(
feaImgHeight
*
feaImgWidth
*
channels
),
...
@@ -870,17 +860,16 @@ void GpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth,
...
@@ -870,17 +860,16 @@ void GpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth,
size_t
elemCnt
=
outputH
*
outputW
*
blockH
*
blockW
*
channels
;
size_t
elemCnt
=
outputH
*
outputW
*
blockH
*
blockW
*
channels
;
CHECK_EQ
(
elemCnt
,
height_
*
width_
)
<<
"Matrix dimensions are not equal"
;
CHECK_EQ
(
elemCnt
,
height_
*
width_
)
<<
"Matrix dimensions are not equal"
;
hl_expand_feature2col
(
feature
.
getData
(),
channels
,
feaImgHeight
,
hl_expand_feature2col
(
feature
.
getData
(),
channels
,
feaImgHeight
,
feaImgWidth
,
feaImgWidth
,
blockH
,
blockW
,
strideH
,
strideW
,
blockH
,
blockW
,
strideH
,
strideW
,
paddingH
,
paddingW
,
paddingH
,
paddingW
,
outputH
,
outputW
,
outputH
,
outputW
,
getData
());
getData
());
}
}
void
GpuMatrix
::
convShrink
(
Matrix
&
expandFeat
,
int
thisImgHeight
,
void
GpuMatrix
::
convShrink
(
Matrix
&
expandFeat
,
int
thisImgHeight
,
int
thisImgWidth
,
int
channels
,
int
blockH
,
int
thisImgWidth
,
int
channels
,
int
blockH
,
int
blockW
,
int
strideH
,
int
strideW
,
int
paddingH
,
int
blockW
,
int
strideH
,
int
strideW
,
int
paddingH
,
int
paddingW
,
int
outputH
,
int
outputW
,
int
paddingW
,
int
outputH
,
int
outputW
,
real
alpha
,
real
alpha
,
real
beta
)
{
real
beta
)
{
CHECK
(
expandFeat
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
CHECK
(
expandFeat
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
CHECK_EQ
(
size_t
(
thisImgHeight
*
thisImgWidth
*
channels
),
CHECK_EQ
(
size_t
(
thisImgHeight
*
thisImgWidth
*
channels
),
getHeight
()
*
getWidth
())
getHeight
()
*
getWidth
())
...
@@ -889,18 +878,17 @@ void GpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight,
...
@@ -889,18 +878,17 @@ void GpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight,
size_t
elemCnt
=
outputH
*
outputW
*
blockW
*
blockH
*
channels
;
size_t
elemCnt
=
outputH
*
outputW
*
blockW
*
blockH
*
channels
;
CHECK
(
elemCnt
==
expandFeat
.
getHeight
()
*
expandFeat
.
getWidth
())
CHECK
(
elemCnt
==
expandFeat
.
getHeight
()
*
expandFeat
.
getWidth
())
<<
"Matrix dimensions are not equal"
;
<<
"Matrix dimensions are not equal"
;
hl_shrink_col2feature
(
hl_shrink_col2feature
(
expandFeat
.
getData
(),
channels
,
thisImgHeight
,
expandFeat
.
getData
(),
channels
,
thisImgHeight
,
thisImgWidth
,
blockH
,
thisImgWidth
,
blockH
,
blockW
,
strideH
,
strideW
,
blockW
,
strideH
,
strideW
,
paddingH
,
paddingW
,
outputH
,
outputW
,
paddingH
,
paddingW
,
outputH
,
outputW
,
getData
(),
alpha
,
getData
(),
alpha
,
beta
);
beta
);
}
}
void
GpuMatrix
::
maxPoolForward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
void
GpuMatrix
::
maxPoolForward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
imgSizeW
,
size_t
channels
,
size_t
sizeX
,
size_t
sizeX
,
size_t
sizeY
,
size_t
sizeY
,
size_t
strideH
,
size_t
strideW
,
size_t
strideH
,
size_t
strideW
,
size_t
outputH
,
size_t
outputW
,
size_t
paddingH
,
size_t
outputH
,
size_t
outputW
,
size_t
paddingW
)
{
size_t
paddingH
,
size_t
paddingW
)
{
CHECK
(
inputMat
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
CHECK
(
inputMat
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
real
*
inputData
=
inputMat
.
getData
();
real
*
inputData
=
inputMat
.
getData
();
...
@@ -911,16 +899,15 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
...
@@ -911,16 +899,15 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
CHECK
(
height_
==
inputMat
.
getHeight
());
CHECK
(
height_
==
inputMat
.
getHeight
());
CHECK
(
width_
==
outputH
*
outputW
*
channels
);
CHECK
(
width_
==
outputH
*
outputW
*
channels
);
hl_maxpool_forward
(
frameNum
,
inputData
,
channels
,
height
,
width
,
hl_maxpool_forward
(
frameNum
,
inputData
,
channels
,
height
,
width
,
outputH
,
output
H
,
outputW
,
sizeX
,
sizeY
,
strideH
,
strideW
,
output
W
,
sizeX
,
sizeY
,
strideH
,
strideW
,
paddingH
,
padding
H
,
paddingW
,
data_
);
padding
W
,
data_
,
getStride
()
);
}
}
void
GpuMatrix
::
maxPoolBackward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
void
GpuMatrix
::
maxPoolBackward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
size_t
imgSizeW
,
Matrix
&
outGrad
,
Matrix
&
outV
,
size_t
imgSizeW
,
Matrix
&
outGrad
,
Matrix
&
outV
,
size_t
sizeX
,
size_t
sizeY
,
size_t
sizeX
,
size_t
sizeY
,
size_t
strideH
,
size_t
strideH
,
size_t
strideW
,
size_t
strideW
,
size_t
outputH
,
size_t
outputW
,
size_t
outputH
,
size_t
outputW
,
real
scaleTargets
,
real
scaleOutput
,
real
scaleTargets
,
real
scaleOutput
,
size_t
paddingH
,
size_t
paddingW
)
{
size_t
paddingH
,
size_t
paddingW
)
{
CHECK
(
inputMat
.
useGpu_
==
true
&&
outGrad
.
useGpu_
==
true
&&
CHECK
(
inputMat
.
useGpu_
==
true
&&
outGrad
.
useGpu_
==
true
&&
...
@@ -940,19 +927,17 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat, size_t imgSizeH,
...
@@ -940,19 +927,17 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat, size_t imgSizeH,
CHECK
(
outGrad
.
getHeight
()
==
outV
.
getHeight
()
&&
CHECK
(
outGrad
.
getHeight
()
==
outV
.
getHeight
()
&&
outGrad
.
getWidth
()
==
outV
.
getWidth
());
outGrad
.
getWidth
()
==
outV
.
getWidth
());
hl_maxpool_backward
(
frameNum
,
inputData
,
outData
,
outDiff
,
channels
,
height
,
hl_maxpool_backward
(
frameNum
,
inputData
,
outData
,
outDiff
,
channels
,
width
,
outputH
,
outputW
,
sizeX
,
sizeY
,
strideH
,
strideW
,
height
,
width
,
outputH
,
outputW
,
sizeX
,
sizeY
,
paddingH
,
paddingW
,
scaleTargets
,
scaleOutput
,
data_
,
strideH
,
strideW
,
paddingH
,
paddingW
,
outGrad
.
getStride
());
scaleTargets
,
scaleOutput
,
data_
);
}
}
void
GpuMatrix
::
avgPoolForward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
void
GpuMatrix
::
avgPoolForward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
imgSizeW
,
size_t
channels
,
size_t
sizeX
,
size_t
sizeX
,
size_t
sizeY
,
size_t
sizeY
,
size_t
strideH
,
size_t
strideW
,
size_t
strideH
,
size_t
strideW
,
size_t
outputH
,
size_t
outputW
,
size_t
paddingH
,
size_t
outputH
,
size_t
outputW
,
size_t
paddingW
)
{
size_t
paddingH
,
size_t
paddingW
)
{
CHECK
(
inputMat
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
CHECK
(
inputMat
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
real
*
inputData
=
inputMat
.
getData
();
real
*
inputData
=
inputMat
.
getData
();
...
@@ -963,18 +948,17 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t imgSizeH,
...
@@ -963,18 +948,17 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t imgSizeH,
CHECK
(
height_
==
inputMat
.
getHeight
());
CHECK
(
height_
==
inputMat
.
getHeight
());
CHECK
(
width_
==
outputH
*
outputW
*
channels
);
CHECK
(
width_
==
outputH
*
outputW
*
channels
);
hl_avgpool_forward
(
frameNum
,
inputData
,
channels
,
height
,
width
,
hl_avgpool_forward
(
frameNum
,
inputData
,
channels
,
height
,
width
,
outputH
,
outputH
,
outputW
,
sizeX
,
sizeY
,
outputW
,
sizeX
,
sizeY
,
strideH
,
strideW
,
paddingH
,
strideH
,
strideW
,
paddingW
,
data_
,
getStride
());
paddingH
,
paddingW
,
data_
);
}
}
void
GpuMatrix
::
avgPoolBackward
(
Matrix
&
outGrad
,
size_t
imgSizeH
,
void
GpuMatrix
::
avgPoolBackward
(
Matrix
&
outGrad
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
sizeX
,
size_t
sizeY
,
size_t
imgSizeW
,
size_t
sizeX
,
size_t
sizeY
,
size_t
strideH
,
size_t
strideW
,
size_t
strideH
,
size_t
strideW
,
size_t
outputH
,
size_t
output
H
,
size_t
outputW
,
size_t
output
W
,
real
scaleTargets
,
real
scale
Targets
,
real
scaleOutput
,
real
scale
Output
,
size_t
paddingH
,
size_t
padding
H
,
size_t
padding
W
)
{
size_t
paddingW
)
{
CHECK
(
outGrad
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
CHECK
(
outGrad
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
real
*
outDiff
=
outGrad
.
getData
();
real
*
outDiff
=
outGrad
.
getData
();
...
@@ -986,11 +970,10 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, size_t imgSizeH,
...
@@ -986,11 +970,10 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, size_t imgSizeH,
CHECK
(
height_
==
outGrad
.
getHeight
());
CHECK
(
height_
==
outGrad
.
getHeight
());
CHECK
(
outGrad
.
getWidth
()
==
outputH
*
outputW
*
channels
);
CHECK
(
outGrad
.
getWidth
()
==
outputH
*
outputW
*
channels
);
hl_avgpool_backward
(
frameNum
,
outDiff
,
channels
,
height
,
width
,
hl_avgpool_backward
(
frameNum
,
outDiff
,
channels
,
height
,
width
,
outputH
,
outputH
,
outputW
,
sizeX
,
sizeY
,
outputW
,
sizeX
,
sizeY
,
strideH
,
strideW
,
paddingH
,
strideH
,
strideW
,
paddingH
,
paddingW
,
paddingW
,
scaleTargets
,
scaleOutput
,
data_
,
scaleTargets
,
scaleOutput
,
outGrad
.
getStride
());
data_
);
}
}
void
GpuMatrix
::
crossMapNormalFwd
(
Matrix
&
input
,
size_t
imgSizeH
,
void
GpuMatrix
::
crossMapNormalFwd
(
Matrix
&
input
,
size_t
imgSizeH
,
...
@@ -1005,8 +988,8 @@ void GpuMatrix::crossMapNormalFwd(Matrix& input, size_t imgSizeH,
...
@@ -1005,8 +988,8 @@ void GpuMatrix::crossMapNormalFwd(Matrix& input, size_t imgSizeH,
CHECK
(
denoms
.
getHeight
()
==
input
.
getHeight
()
&&
CHECK
(
denoms
.
getHeight
()
==
input
.
getHeight
()
&&
denoms
.
getWidth
()
==
input
.
getWidth
()
&&
input
.
getHeight
()
==
height_
&&
denoms
.
getWidth
()
==
input
.
getWidth
()
&&
input
.
getHeight
()
==
height_
&&
input
.
getWidth
()
==
width_
);
input
.
getWidth
()
==
width_
);
hl_CMRNorm_forward
(
num
,
input
.
getData
(),
denoms
.
getData
(),
data_
,
hl_CMRNorm_forward
(
num
,
input
.
getData
(),
denoms
.
getData
(),
data_
,
channels
,
channels
,
height
,
width
,
sizeX
,
scale
,
-
pow
);
height
,
width
,
sizeX
,
scale
,
-
pow
);
}
}
void
GpuMatrix
::
crossMapNormalBwd
(
Matrix
&
localGrad
,
Matrix
&
denoms
,
void
GpuMatrix
::
crossMapNormalBwd
(
Matrix
&
localGrad
,
Matrix
&
denoms
,
...
@@ -1026,13 +1009,11 @@ void GpuMatrix::crossMapNormalBwd(Matrix& localGrad, Matrix& denoms,
...
@@ -1026,13 +1009,11 @@ void GpuMatrix::crossMapNormalBwd(Matrix& localGrad, Matrix& denoms,
denoms
.
getWidth
()
==
localGrad
.
getWidth
());
denoms
.
getWidth
()
==
localGrad
.
getWidth
());
hl_CMRNorm_backward
(
num
,
preOutV
.
getData
(),
denoms
.
getData
(),
hl_CMRNorm_backward
(
num
,
preOutV
.
getData
(),
denoms
.
getData
(),
localOutV
.
getData
(),
localGrad
.
getData
(),
data_
,
localOutV
.
getData
(),
localGrad
.
getData
(),
data_
,
channels
,
channels
,
height
,
width
,
sizeX
,
-
pow
,
height
,
width
,
sizeX
,
-
pow
,
2.0
f
*
pow
*
scale
);
2.0
f
*
pow
*
scale
);
}
}
void
GpuMatrix
::
maxSequenceForward
(
Matrix
&
input
,
void
GpuMatrix
::
maxSequenceForward
(
Matrix
&
input
,
const
IVector
&
sequence
,
const
IVector
&
sequence
,
IVector
&
index
)
{
IVector
&
index
)
{
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
input
));
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
input
));
CHECK
(
dynamic_cast
<
const
GpuIVector
*>
(
&
sequence
));
CHECK
(
dynamic_cast
<
const
GpuIVector
*>
(
&
sequence
));
...
@@ -1049,12 +1030,11 @@ void GpuMatrix::maxSequenceForward(Matrix& input,
...
@@ -1049,12 +1030,11 @@ void GpuMatrix::maxSequenceForward(Matrix& input,
CHECK_EQ
(
numSequences
,
sequence
.
getSize
()
-
1
);
CHECK_EQ
(
numSequences
,
sequence
.
getSize
()
-
1
);
CHECK_EQ
(
numSequences
*
dim
,
index
.
getSize
());
CHECK_EQ
(
numSequences
*
dim
,
index
.
getSize
());
hl_max_sequence_forward
(
inputData
,
starts
,
outData
,
maxIndex
,
hl_max_sequence_forward
(
inputData
,
starts
,
outData
,
maxIndex
,
numSequences
,
numSequences
,
dim
);
dim
);
}
}
void
GpuMatrix
::
maxSequenceBackward
(
Matrix
&
outputGrad
,
void
GpuMatrix
::
maxSequenceBackward
(
Matrix
&
outputGrad
,
const
IVector
&
sequence
,
const
IVector
&
sequence
,
IVector
&
index
)
{
IVector
&
index
)
{
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
outputGrad
));
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
outputGrad
));
CHECK
(
dynamic_cast
<
const
GpuIVector
*>
(
&
sequence
));
CHECK
(
dynamic_cast
<
const
GpuIVector
*>
(
&
sequence
));
...
@@ -1111,9 +1091,8 @@ void GpuMatrix::contextProjectionBackwardData(MatrixPtr inputGrad,
...
@@ -1111,9 +1091,8 @@ void GpuMatrix::contextProjectionBackwardData(MatrixPtr inputGrad,
real
*
inGrad
=
inputGrad
->
getData
();
real
*
inGrad
=
inputGrad
->
getData
();
const
int
*
starts
=
sequence
.
getData
();
const
int
*
starts
=
sequence
.
getData
();
hl_context_projection_backward_data
(
outGrad
,
starts
,
inGrad
,
hl_context_projection_backward_data
(
outGrad
,
starts
,
inGrad
,
numSequences
,
numSequences
,
inputDim
,
inputDim
,
contextLength
,
contextStart
);
contextLength
,
contextStart
);
}
}
void
GpuMatrix
::
contextProjectionBackwardWeight
(
MatrixPtr
weightGrad
,
void
GpuMatrix
::
contextProjectionBackwardWeight
(
MatrixPtr
weightGrad
,
...
@@ -1133,9 +1112,9 @@ void GpuMatrix::contextProjectionBackwardWeight(MatrixPtr weightGrad,
...
@@ -1133,9 +1112,9 @@ void GpuMatrix::contextProjectionBackwardWeight(MatrixPtr weightGrad,
real
*
wtGrad
=
weightGrad
->
getData
();
real
*
wtGrad
=
weightGrad
->
getData
();
const
int
*
starts
=
sequence
.
getData
();
const
int
*
starts
=
sequence
.
getData
();
hl_context_projection_backward_weight
(
hl_context_projection_backward_weight
(
outGrad
,
starts
,
wtGrad
,
numSequences
,
outGrad
,
starts
,
wtGrad
,
numSequences
,
weightDim
,
totalPad
,
contextLength
,
weightDim
,
totalPad
,
contextLength
,
contextStart
,
beginPad
);
contextStart
,
beginPad
);
}
}
void
GpuMatrix
::
paramReluForward
(
Matrix
&
data
,
Matrix
&
W
)
{
void
GpuMatrix
::
paramReluForward
(
Matrix
&
data
,
Matrix
&
W
)
{
...
@@ -1147,8 +1126,7 @@ void GpuMatrix::paramReluForward(Matrix& data, Matrix& W) {
...
@@ -1147,8 +1126,7 @@ void GpuMatrix::paramReluForward(Matrix& data, Matrix& W) {
size_t
numSamples
=
data
.
getHeight
();
size_t
numSamples
=
data
.
getHeight
();
size_t
partial_sum
=
numElements
/
(
W
.
getHeight
()
*
W
.
getWidth
());
size_t
partial_sum
=
numElements
/
(
W
.
getHeight
()
*
W
.
getWidth
());
real
*
output
=
getData
();
real
*
output
=
getData
();
hl_param_relu_forward
(
output
,
input
,
w
,
numElements
,
numSamples
,
hl_param_relu_forward
(
output
,
input
,
w
,
numElements
,
numSamples
,
partial_sum
);
partial_sum
);
}
}
void
GpuMatrix
::
paramReluBackwardW
(
Matrix
&
oGrad
,
Matrix
&
data
)
{
void
GpuMatrix
::
paramReluBackwardW
(
Matrix
&
oGrad
,
Matrix
&
data
)
{
...
@@ -1160,8 +1138,8 @@ void GpuMatrix::paramReluBackwardW(Matrix& oGrad, Matrix& data) {
...
@@ -1160,8 +1138,8 @@ void GpuMatrix::paramReluBackwardW(Matrix& oGrad, Matrix& data) {
size_t
numElements
=
data
.
getWidth
();
size_t
numElements
=
data
.
getWidth
();
size_t
numSamples
=
data
.
getHeight
();
size_t
numSamples
=
data
.
getHeight
();
size_t
partial_sum
=
numElements
/
(
this
->
getHeight
()
*
this
->
getWidth
());
size_t
partial_sum
=
numElements
/
(
this
->
getHeight
()
*
this
->
getWidth
());
hl_param_relu_backward_w
(
wgrad
,
ograd
,
input
,
hl_param_relu_backward_w
(
wgrad
,
ograd
,
input
,
numElements
,
numSamples
,
numElements
,
numSamples
,
partial_sum
);
partial_sum
);
}
}
void
GpuMatrix
::
paramReluBackwardDiff
(
Matrix
&
oGrad
,
Matrix
&
data
,
Matrix
&
W
)
{
void
GpuMatrix
::
paramReluBackwardDiff
(
Matrix
&
oGrad
,
Matrix
&
data
,
Matrix
&
W
)
{
...
@@ -1172,8 +1150,8 @@ void GpuMatrix::paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) {
...
@@ -1172,8 +1150,8 @@ void GpuMatrix::paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) {
size_t
numElements
=
data
.
getWidth
();
size_t
numElements
=
data
.
getWidth
();
size_t
numSamples
=
data
.
getHeight
();
size_t
numSamples
=
data
.
getHeight
();
size_t
partial_sum
=
numElements
/
(
W
.
getHeight
()
*
W
.
getWidth
());
size_t
partial_sum
=
numElements
/
(
W
.
getHeight
()
*
W
.
getWidth
());
hl_param_relu_backward_diff
(
ograd
,
input
,
w
,
diff
,
hl_param_relu_backward_diff
(
ograd
,
input
,
w
,
diff
,
numElements
,
numSamples
,
numElements
,
numSamples
,
partial_sum
);
partial_sum
);
}
}
void
GpuMatrix
::
addColumnVector
(
const
Matrix
&
b
)
{
void
GpuMatrix
::
addColumnVector
(
const
Matrix
&
b
)
{
...
@@ -1422,8 +1400,8 @@ void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
...
@@ -1422,8 +1400,8 @@ void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
void
CpuMatrix
::
convExpand
(
Matrix
&
feature
,
int
feaImgHeight
,
int
feaImgWidth
,
void
CpuMatrix
::
convExpand
(
Matrix
&
feature
,
int
feaImgHeight
,
int
feaImgWidth
,
int
channels
,
int
blockH
,
int
blockW
,
int
strideH
,
int
channels
,
int
blockH
,
int
blockW
,
int
strideH
,
int
strideW
,
int
paddingH
,
int
paddingW
,
int
strideW
,
int
paddingH
,
int
paddingW
,
int
outputH
,
int
output
H
,
int
output
W
)
{
int
outputW
)
{
CHECK
(
feature
.
useGpu_
==
false
)
<<
"Matrix type are not equal"
;
CHECK
(
feature
.
useGpu_
==
false
)
<<
"Matrix type are not equal"
;
CHECK_EQ
(
size_t
(
feaImgHeight
*
feaImgWidth
*
channels
),
CHECK_EQ
(
size_t
(
feaImgHeight
*
feaImgWidth
*
channels
),
...
@@ -1463,8 +1441,8 @@ void CpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth,
...
@@ -1463,8 +1441,8 @@ void CpuMatrix::convExpand(Matrix& feature, int feaImgHeight, int feaImgWidth,
void
CpuMatrix
::
convShrink
(
Matrix
&
expandFeat
,
int
thisImgHeight
,
void
CpuMatrix
::
convShrink
(
Matrix
&
expandFeat
,
int
thisImgHeight
,
int
thisImgWidth
,
int
channels
,
int
blockH
,
int
thisImgWidth
,
int
channels
,
int
blockH
,
int
blockW
,
int
strideH
,
int
strideW
,
int
paddingH
,
int
blockW
,
int
strideH
,
int
strideW
,
int
paddingH
,
int
paddingW
,
int
outputH
,
int
outputW
,
int
paddingW
,
int
outputH
,
int
outputW
,
real
alpha
,
real
alpha
,
real
beta
)
{
real
beta
)
{
CHECK
(
expandFeat
.
useGpu_
==
false
)
<<
"Matrix type are not equal"
;
CHECK
(
expandFeat
.
useGpu_
==
false
)
<<
"Matrix type are not equal"
;
CHECK_EQ
(
size_t
(
thisImgHeight
*
thisImgWidth
*
channels
),
CHECK_EQ
(
size_t
(
thisImgHeight
*
thisImgWidth
*
channels
),
getHeight
()
*
getWidth
())
getHeight
()
*
getWidth
())
...
@@ -1501,11 +1479,10 @@ void CpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight,
...
@@ -1501,11 +1479,10 @@ void CpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight,
}
}
void
CpuMatrix
::
maxPoolForward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
void
CpuMatrix
::
maxPoolForward
(
Matrix
&
inputMat
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
imgSizeW
,
size_t
channels
,
size_t
sizeX
,
size_t
sizeX
,
size_t
sizeY
,
size_t
sizeY
,
size_t
strideH
,
size_t
strideW
,
size_t
strideH
,
size_t
strideW
,
size_t
outputH
,
size_t
outputW
,
size_t
paddingH
,
size_t
outputH
,
size_t
outputW
,
size_t
paddingW
)
{
size_t
paddingH
,
size_t
paddingW
)
{
real
*
inputData
=
inputMat
.
getData
();
real
*
inputData
=
inputMat
.
getData
();
real
*
outData
=
data_
;
real
*
outData
=
data_
;
size_t
num
=
inputMat
.
getHeight
();
size_t
num
=
inputMat
.
getHeight
();
...
@@ -1513,15 +1490,20 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
...
@@ -1513,15 +1490,20 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t
inHeight
=
imgSizeH
;
size_t
inHeight
=
imgSizeH
;
CHECK
(
inHeight
*
inWidth
==
inputMat
.
getWidth
()
/
channels
);
CHECK
(
inHeight
*
inWidth
==
inputMat
.
getWidth
()
/
channels
);
CHECK_EQ
(
num
,
this
->
getHeight
());
CHECK_EQ
(
num
,
this
->
getHeight
());
CHECK_EQ
(
channels
*
outputH
*
outputW
,
this
->
getWidth
());
CHECK_EQ
(
channels
*
outputH
*
outputW
,
this
->
getWidth
());
/* initialize the data_ */
/* initialize the data_ */
for
(
size_t
i
=
0
;
i
<
height_
*
width_
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
height_
;
i
++
)
{
outData
[
i
]
=
-
(
real
)
FLT_MAX
;
for
(
size_t
j
=
0
;
j
<
width_
;
j
++
)
{
outData
[
i
*
getStride
()
+
j
]
=
-
(
real
)
FLT_MAX
;
}
}
}
/* pool max one by one */
/* pool max one by one */
for
(
size_t
n
=
0
;
n
<
num
;
++
n
)
{
// frame by frame
for
(
size_t
n
=
0
;
n
<
num
;
++
n
)
{
// frame by frame
if
(
!
isContiguous
())
{
outData
=
data_
+
n
*
getStride
();
}
for
(
size_t
c
=
0
;
c
<
channels
;
++
c
)
{
// channel by channel
for
(
size_t
c
=
0
;
c
<
channels
;
++
c
)
{
// channel by channel
for
(
size_t
ph
=
0
;
ph
<
outputH
;
++
ph
)
{
for
(
size_t
ph
=
0
;
ph
<
outputH
;
++
ph
)
{
for
(
size_t
pw
=
0
;
pw
<
outputW
;
++
pw
)
{
for
(
size_t
pw
=
0
;
pw
<
outputW
;
++
pw
)
{
...
@@ -1564,6 +1546,10 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
...
@@ -1564,6 +1546,10 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
real
*
otData
=
outV
.
getData
();
real
*
otData
=
outV
.
getData
();
real
*
otGrad
=
outGrad
.
getData
();
real
*
otGrad
=
outGrad
.
getData
();
for
(
size_t
n
=
0
;
n
<
num
;
++
n
)
{
for
(
size_t
n
=
0
;
n
<
num
;
++
n
)
{
if
(
!
outV
.
isContiguous
())
{
otData
=
outV
.
getData
()
+
n
*
outV
.
getStride
();
otGrad
=
outGrad
.
getData
()
+
n
*
outGrad
.
getStride
();
}
for
(
size_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
size_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
size_t
ph
=
0
;
ph
<
outputH
;
++
ph
)
{
for
(
size_t
ph
=
0
;
ph
<
outputH
;
++
ph
)
{
for
(
size_t
pw
=
0
;
pw
<
outputW
;
++
pw
)
{
for
(
size_t
pw
=
0
;
pw
<
outputW
;
++
pw
)
{
...
@@ -1594,9 +1580,9 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
...
@@ -1594,9 +1580,9 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
void
CpuMatrix
::
avgPoolForward
(
Matrix
&
input
,
size_t
imgSizeH
,
size_t
imgSizeW
,
void
CpuMatrix
::
avgPoolForward
(
Matrix
&
input
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
channels
,
size_t
sizeX
,
size_t
sizeY
,
size_t
channels
,
size_t
sizeX
,
size_t
sizeY
,
size_t
strideH
,
size_t
strideW
,
size_t
strideH
,
size_t
strideW
,
size_t
outputH
,
size_t
output
H
,
size_t
outputW
,
size_t
output
W
,
size_t
paddingH
,
size_t
padding
H
,
size_t
padding
W
)
{
size_t
paddingW
)
{
// The main loop
// The main loop
size_t
num
=
input
.
getHeight
();
size_t
num
=
input
.
getHeight
();
size_t
inHeight
=
imgSizeH
;
size_t
inHeight
=
imgSizeH
;
...
@@ -1607,6 +1593,9 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
...
@@ -1607,6 +1593,9 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
real
*
inData
=
input
.
getData
();
real
*
inData
=
input
.
getData
();
for
(
size_t
n
=
0
;
n
<
num
;
++
n
)
{
for
(
size_t
n
=
0
;
n
<
num
;
++
n
)
{
if
(
!
isContiguous
())
{
tgtData
=
data_
+
n
*
getStride
();
}
for
(
size_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
size_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
size_t
ph
=
0
;
ph
<
outputH
;
++
ph
)
{
for
(
size_t
ph
=
0
;
ph
<
outputH
;
++
ph
)
{
for
(
size_t
pw
=
0
;
pw
<
outputW
;
++
pw
)
{
for
(
size_t
pw
=
0
;
pw
<
outputW
;
++
pw
)
{
...
@@ -1638,9 +1627,8 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
...
@@ -1638,9 +1627,8 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
}
}
void
CpuMatrix
::
avgPoolBackward
(
Matrix
&
input
,
size_t
imgSizeH
,
size_t
imgSizeW
,
void
CpuMatrix
::
avgPoolBackward
(
Matrix
&
input
,
size_t
imgSizeH
,
size_t
imgSizeW
,
size_t
sizeX
,
size_t
sizeY
,
size_t
sizeX
,
size_t
sizeY
,
size_t
strideH
,
size_t
strideH
,
size_t
strideW
,
size_t
strideW
,
size_t
outputH
,
size_t
outputW
,
size_t
outputH
,
size_t
outputW
,
real
scaleTargets
,
real
scaleOutput
,
real
scaleTargets
,
real
scaleOutput
,
size_t
paddingH
,
size_t
paddingW
)
{
size_t
paddingH
,
size_t
paddingW
)
{
size_t
num
=
input
.
getHeight
();
size_t
num
=
input
.
getHeight
();
...
@@ -1650,6 +1638,9 @@ void CpuMatrix::avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
...
@@ -1650,6 +1638,9 @@ void CpuMatrix::avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
real
*
outData
=
getData
();
real
*
outData
=
getData
();
for
(
size_t
n
=
0
;
n
<
num
;
++
n
)
{
for
(
size_t
n
=
0
;
n
<
num
;
++
n
)
{
if
(
!
input
.
isContiguous
())
{
inData
=
input
.
getData
()
+
n
*
input
.
getStride
();
}
for
(
size_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
size_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
size_t
ph
=
0
;
ph
<
outputH
;
++
ph
)
{
for
(
size_t
ph
=
0
;
ph
<
outputH
;
++
ph
)
{
for
(
size_t
pw
=
0
;
pw
<
outputW
;
++
pw
)
{
for
(
size_t
pw
=
0
;
pw
<
outputW
;
++
pw
)
{
...
@@ -1752,8 +1743,7 @@ void CpuMatrix::crossMapNormalBwd(Matrix& localGrad, Matrix& denoms,
...
@@ -1752,8 +1743,7 @@ void CpuMatrix::crossMapNormalBwd(Matrix& localGrad, Matrix& denoms,
* Output: output size is the number of input sequences (NOT input instances).
* Output: output size is the number of input sequences (NOT input instances).
* output[i] is set to max_{for each instance in this sequence}{input[i]}
* output[i] is set to max_{for each instance in this sequence}{input[i]}
*/
*/
void
CpuMatrix
::
maxSequenceForward
(
Matrix
&
input
,
void
CpuMatrix
::
maxSequenceForward
(
Matrix
&
input
,
const
IVector
&
sequence
,
const
IVector
&
sequence
,
IVector
&
index
)
{
IVector
&
index
)
{
CHECK
(
dynamic_cast
<
CpuMatrix
*>
(
&
input
));
CHECK
(
dynamic_cast
<
CpuMatrix
*>
(
&
input
));
CHECK
(
dynamic_cast
<
const
CpuIVector
*>
(
&
sequence
));
CHECK
(
dynamic_cast
<
const
CpuIVector
*>
(
&
sequence
));
...
@@ -1794,8 +1784,7 @@ void CpuMatrix::maxSequenceForward(Matrix& input,
...
@@ -1794,8 +1784,7 @@ void CpuMatrix::maxSequenceForward(Matrix& input,
}
}
}
}
void
CpuMatrix
::
maxSequenceBackward
(
Matrix
&
outputGrad
,
void
CpuMatrix
::
maxSequenceBackward
(
Matrix
&
outputGrad
,
const
IVector
&
sequence
,
const
IVector
&
sequence
,
IVector
&
index
)
{
IVector
&
index
)
{
CHECK
(
dynamic_cast
<
CpuMatrix
*>
(
&
outputGrad
));
CHECK
(
dynamic_cast
<
CpuMatrix
*>
(
&
outputGrad
));
CHECK
(
dynamic_cast
<
const
CpuIVector
*>
(
&
sequence
));
CHECK
(
dynamic_cast
<
const
CpuIVector
*>
(
&
sequence
));
...
@@ -2000,8 +1989,7 @@ void CpuMatrix::collectBias(Matrix& a, real scale) {
...
@@ -2000,8 +1989,7 @@ void CpuMatrix::collectBias(Matrix& a, real scale) {
}
}
}
}
void
CpuMatrix
::
sequenceAvgForward
(
Matrix
&
a
,
void
CpuMatrix
::
sequenceAvgForward
(
Matrix
&
a
,
const
IVector
&
startsPos
,
const
IVector
&
startsPos
,
int
mode
)
{
int
mode
)
{
size_t
height
=
getHeight
();
size_t
height
=
getHeight
();
size_t
width
=
getWidth
();
size_t
width
=
getWidth
();
...
@@ -2592,7 +2580,7 @@ void SharedCpuMatrix::mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB,
...
@@ -2592,7 +2580,7 @@ void SharedCpuMatrix::mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB,
blockSeq
.
push_back
(
k
);
blockSeq
.
push_back
(
k
);
}
}
std
::
shuffle
(
blockSeq
.
begin
(),
blockSeq
.
end
(),
std
::
shuffle
(
blockSeq
.
begin
(),
blockSeq
.
end
(),
ThreadLocalRandomEngine
::
get
());
ThreadLocalRandomEngine
::
get
());
}
}
std
::
vector
<
int
>&
localBufRows
=
*
localBufRows_
;
std
::
vector
<
int
>&
localBufRows
=
*
localBufRows_
;
int
*
cols
=
a
->
getCols
();
int
*
cols
=
a
->
getCols
();
...
@@ -2823,7 +2811,7 @@ void CpuMatrix::maxoutForward(Matrix& a, IVector& id, size_t channels,
...
@@ -2823,7 +2811,7 @@ void CpuMatrix::maxoutForward(Matrix& a, IVector& id, size_t channels,
size_t
size
=
getWidth
();
size_t
size
=
getWidth
();
size_t
batchSize
=
getHeight
();
size_t
batchSize
=
getHeight
();
size_t
featLen
=
size
/
channels
;
size_t
featLen
=
size
/
channels
;
const
real
*
input
=
a
.
getData
();
const
real
*
input
=
a
.
getData
();
int
*
idForCpu
=
id
.
getData
();
int
*
idForCpu
=
id
.
getData
();
MatrixPtr
maxInMat
,
maxOutMat
;
MatrixPtr
maxInMat
,
maxOutMat
;
...
@@ -2857,8 +2845,8 @@ void CpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels,
...
@@ -2857,8 +2845,8 @@ void CpuMatrix::maxoutBackward(Matrix& a, IVector& id, size_t channels,
size_t
batchSize
=
getHeight
();
size_t
batchSize
=
getHeight
();
size_t
featLen
=
size
/
channels
;
size_t
featLen
=
size
/
channels
;
size_t
newFeatLen
=
groups
*
featLen
;
size_t
newFeatLen
=
groups
*
featLen
;
real
*
inputG
=
getData
();
real
*
inputG
=
getData
();
const
real
*
outG
=
a
.
getData
();
const
real
*
outG
=
a
.
getData
();
int
*
idForCpu
=
id
.
getData
();
int
*
idForCpu
=
id
.
getData
();
for
(
size_t
batch_idx
=
0
;
batch_idx
<
batchSize
;
++
batch_idx
)
{
for
(
size_t
batch_idx
=
0
;
batch_idx
<
batchSize
;
++
batch_idx
)
{
...
@@ -3082,9 +3070,9 @@ void CpuMatrix::sequenceSoftmax(Matrix& output, const IVector& index) {
...
@@ -3082,9 +3070,9 @@ void CpuMatrix::sequenceSoftmax(Matrix& output, const IVector& index) {
CHECK
(
isContiguous
());
CHECK
(
isContiguous
());
MatrixPtr
inTmp
=
Matrix
::
create
(
nullptr
,
/* height= */
1
,
1
,
MatrixPtr
inTmp
=
Matrix
::
create
(
nullptr
,
/* height= */
1
,
1
,
/* trans= */
false
,
false
);
/* trans= */
false
,
false
);
MatrixPtr
outTmp
=
Matrix
::
create
(
nullptr
,
/* height= */
1
,
1
,
MatrixPtr
outTmp
=
Matrix
::
create
(
nullptr
,
/* height= */
1
,
1
,
/* trans= */
false
,
false
);
/* trans= */
false
,
false
);
size_t
numSequences
=
index
.
getSize
()
-
1
;
size_t
numSequences
=
index
.
getSize
()
-
1
;
auto
starts
=
index
.
getData
();
auto
starts
=
index
.
getData
();
for
(
size_t
i
=
0
;
i
<
numSequences
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
numSequences
;
++
i
)
{
...
...
proto/ModelConfig.proto.m4
浏览文件 @
cdac60f6
...
@@ -120,6 +120,14 @@ message PoolConfig {
...
@@ -120,6 +120,14 @@ message PoolConfig {
optional uint32 padding_y = 13 [default = 0];
optional uint32 padding_y = 13 [default = 0];
}
}
message SppConfig {
required string pool_type = 1;
required uint32 pyramid_height = 2;
required uint32 channels = 3;
required uint32 img_size = 4;
optional uint32 img_size_y = 5;
}
message NormConfig {
message NormConfig {
// rnorm or cmrnorm
// rnorm or cmrnorm
required string norm_type = 1;
required string norm_type = 1;
...
@@ -194,6 +202,9 @@ message ProjectionConfig {
...
@@ -194,6 +202,9 @@ message ProjectionConfig {
optional ConvConfig conv_conf = 8;
optional ConvConfig conv_conf = 8;
optional int32 num_filters = 9;
optional int32 num_filters = 9;
// For pool
optional PoolConfig pool_conf = 10;
// For IdentityOffsetProjection
// For IdentityOffsetProjection
optional uint64 offset = 11 [default = 0];
optional uint64 offset = 11 [default = 0];
}
}
...
@@ -235,6 +246,7 @@ message LayerInputConfig {
...
@@ -235,6 +246,7 @@ message LayerInputConfig {
// Set the argument name.
// Set the argument name.
optional string input_layer_argument = 9;
optional string input_layer_argument = 9;
optional MaxOutConfig maxout_conf = 10;
optional MaxOutConfig maxout_conf = 10;
optional SppConfig spp_conf = 11;
}
}
message LayerConfig {
message LayerConfig {
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录