Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
be3e2764
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
be3e2764
编写于
12月 29, 2016
作者:
T
tianbingsz
提交者:
GitHub
12月 29, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1009 from tianbingsz/paddle_func_mat
add paddle functions for Matrix ContextProjection APIs
上级
54a2b1f6
ec6b13db
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
1201 addition
and
800 deletion
+1201
-800
paddle/cuda/include/hl_sequence.h
paddle/cuda/include/hl_sequence.h
+0
-72
paddle/cuda/include/stub/hl_sequence_stub.h
paddle/cuda/include/stub/hl_sequence_stub.h
+0
-29
paddle/cuda/src/hl_cuda_sequence.cu
paddle/cuda/src/hl_cuda_sequence.cu
+0
-252
paddle/function/CMakeLists.txt
paddle/function/CMakeLists.txt
+3
-0
paddle/function/ContextProjectionOp.cpp
paddle/function/ContextProjectionOp.cpp
+373
-0
paddle/function/ContextProjectionOp.h
paddle/function/ContextProjectionOp.h
+85
-0
paddle/function/ContextProjectionOpGpu.cu
paddle/function/ContextProjectionOpGpu.cu
+401
-0
paddle/function/ContextProjectionOpTest.cpp
paddle/function/ContextProjectionOpTest.cpp
+172
-0
paddle/function/Function.cpp
paddle/function/Function.cpp
+28
-0
paddle/function/Function.h
paddle/function/Function.h
+15
-0
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+28
-16
paddle/gserver/layers/ContextProjection.cpp
paddle/gserver/layers/ContextProjection.cpp
+64
-48
paddle/gserver/layers/ContextProjection.h
paddle/gserver/layers/ContextProjection.h
+2
-0
paddle/gserver/layers/Projection.h
paddle/gserver/layers/Projection.h
+30
-0
paddle/math/Matrix.cpp
paddle/math/Matrix.cpp
+0
-169
paddle/math/Matrix.h
paddle/math/Matrix.h
+0
-72
paddle/math/tests/test_matrixCompare.cpp
paddle/math/tests/test_matrixCompare.cpp
+0
-142
未找到文件。
paddle/cuda/include/hl_sequence.h
浏览文件 @
be3e2764
...
...
@@ -48,78 +48,6 @@ extern void hl_max_sequence_forward(real* input,
extern
void
hl_max_sequence_backward
(
real
*
outputGrad
,
int
*
index
,
real
*
inputGrad
,
int
numSequences
,
int
dim
);
/**
* @brief Context projection forward.
*
* @param[in] input input sequence.
* @param[in] sequence sequence index.
* @param[in] weightData padding data.
* @param[out] output output sequence.
* @param[in] numSequences number of sequences.
* @param[in] inputDim input sequence dimension.
* @param[in] contextLength context length.
* @param[in] contextStart context start.
* @param[in] beginPad number of extra timesteps added at the
* beginning.
* @param[in] isPadding trainable padding.
*
*/
extern
void
hl_context_projection_forward
(
real
*
input
,
const
int
*
sequence
,
real
*
weightData
,
real
*
output
,
int
numSequences
,
int
inputDim
,
int
contextLength
,
int
contextStart
,
int
beginPad
,
bool
isPadding
);
/**
* @brief Context projection backward data.
*
* @param[in] outputGrad output gradient.
* @param[in] sequence sequence index.
* @param[out] inputGrad input gradient.
* @param[in] numSequences number of sequences.
* @param[in] inputDim input sequence dimension.
* @param[in] contextLength context length.
* @param[in] contextStart context start.
*
*/
extern
void
hl_context_projection_backward_data
(
real
*
outputGrad
,
const
int
*
sequence
,
real
*
inputGrad
,
int
numSequences
,
int
inputDim
,
int
contextLength
,
int
contextStart
);
/**
* @brief Context projection backward weight.
*
* @param[in] outputGrad output gradient.
* @param[in] sequence sequence index.
* @param[out] weightGrad weight gradient.
* @param[in] numSequences number of sequences.
* @param[in] weightDim input sequence dimension.
* @param[in] totalPad number of extra timesteps.
* @param[in] contextLength context length.
* @param[in] contextStart context start.
* @param[in] beginPad number of extra timesteps added at the
* beginning.
*
*/
extern
void
hl_context_projection_backward_weight
(
real
*
outputGrad
,
const
int
*
sequence
,
real
*
weightGrad
,
int
numSequences
,
int
weightDim
,
int
totalPad
,
int
contextLength
,
int
contextStart
,
int
beginPad
);
/**
* @brief Memory copy from sequence to batch.
*
...
...
paddle/cuda/include/stub/hl_sequence_stub.h
浏览文件 @
be3e2764
...
...
@@ -27,35 +27,6 @@ inline void hl_max_sequence_forward(real* input,
inline
void
hl_max_sequence_backward
(
real
*
outputGrad
,
int
*
index
,
real
*
inputGrad
,
int
numSequences
,
int
dim
)
{}
inline
void
hl_context_projection_forward
(
real
*
input
,
const
int
*
sequence
,
real
*
weightData
,
real
*
output
,
int
numSequences
,
int
inputDim
,
int
contextLength
,
int
contextStart
,
int
beginPad
,
bool
isPadding
)
{}
inline
void
hl_context_projection_backward_data
(
real
*
outputGrad
,
const
int
*
sequence
,
real
*
inputGrad
,
int
numSequences
,
int
inputDim
,
int
contextLength
,
int
contextStart
)
{}
inline
void
hl_context_projection_backward_weight
(
real
*
outputGrad
,
const
int
*
sequence
,
real
*
weightGrad
,
int
numSequences
,
int
weightDim
,
int
totalPad
,
int
contextLength
,
int
contextStart
,
int
beginPad
)
{}
inline
void
hl_sequence2batch_copy
(
real
*
batch
,
real
*
sequence
,
const
int
*
batchIndex
,
...
...
paddle/cuda/src/hl_cuda_sequence.cu
浏览文件 @
be3e2764
...
...
@@ -90,258 +90,6 @@ void hl_max_sequence_backward(real* outputGrad,
CHECK_SYNC
(
"hl_max_sequence_backward failed"
);
}
template
<
bool
padding
>
__global__
void
KeContextProjectionForward
(
real
*
input
,
const
int
*
sequence
,
real
*
weightData
,
real
*
output
,
int
inputDim
,
int
contextLength
,
int
contextStart
,
int
beginPad
)
{
int
idx
=
threadIdx
.
x
;
int
blockSize
=
blockDim
.
x
;
int
sequenceId
=
blockIdx
.
x
;
int
seqStart
=
sequence
[
sequenceId
];
int
seqEnd
=
sequence
[
sequenceId
+
1
];
real
value
=
0
;
int
instances
=
seqEnd
-
seqStart
+
contextLength
-
1
;
output
+=
seqStart
*
inputDim
*
contextLength
;
input
+=
seqStart
*
inputDim
;
for
(
int
k
=
0
;
k
<=
inputDim
/
blockSize
;
k
++
)
{
if
(
idx
<
inputDim
)
{
for
(
int
i
=
0
;
i
<
instances
;
i
++
)
{
// i + contextStart;
if
((
i
+
contextStart
)
<
0
)
{
if
(
padding
)
{
value
=
weightData
[
i
*
inputDim
+
idx
];
}
else
{
continue
;
}
}
else
if
((
i
+
contextStart
)
>=
(
seqEnd
-
seqStart
))
{
if
(
padding
)
{
value
=
weightData
[(
beginPad
+
i
+
contextStart
-
(
seqEnd
-
seqStart
))
*
inputDim
+
idx
];
}
else
{
continue
;
}
}
else
{
value
=
input
[(
i
+
contextStart
)
*
inputDim
+
idx
];
}
int
outx
=
(
i
-
contextLength
)
<
0
?
i
:
(
contextLength
-
1
);
int
outy
=
(
i
-
contextLength
)
<
0
?
0
:
(
i
-
(
contextLength
-
1
));
real
*
output_r
=
output
+
outy
*
inputDim
*
contextLength
+
outx
*
inputDim
;
for
(
int
j
=
outy
;
j
<
seqEnd
-
seqStart
;
j
++
)
{
output_r
[
idx
]
+=
value
;
if
(
j
-
outy
==
outx
)
break
;
output_r
+=
(
contextLength
-
1
)
*
inputDim
;
}
}
}
idx
+=
blockSize
;
}
}
void
hl_context_projection_forward
(
real
*
input
,
const
int
*
sequence
,
real
*
weightData
,
real
*
output
,
int
numSequences
,
int
inputDim
,
int
contextLength
,
int
contextStart
,
int
beginPad
,
bool
isPadding
)
{
CHECK_NOTNULL
(
input
);
CHECK_NOTNULL
(
sequence
);
CHECK_NOTNULL
(
output
);
CHECK
(
!
isPadding
||
weightData
);
int
blockSize
=
128
;
int
blocksX
=
numSequences
;
int
blocksY
=
1
;
dim3
threads
(
blockSize
,
1
);
dim3
grid
(
blocksX
,
blocksY
);
if
(
isPadding
)
{
KeContextProjectionForward
<
true
><<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
input
,
sequence
,
weightData
,
output
,
inputDim
,
contextLength
,
contextStart
,
beginPad
);
}
else
{
KeContextProjectionForward
<
false
><<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
input
,
sequence
,
weightData
,
output
,
inputDim
,
contextLength
,
contextStart
,
beginPad
);
}
CHECK_SYNC
(
"hl_context_projection_forward failed"
);
}
__global__
void
KeContextProjectionBackwardData
(
real
*
outputGrad
,
const
int
*
sequence
,
real
*
inputGrad
,
int
inputDim
,
int
contextLength
,
int
contextStart
)
{
int
idx
=
threadIdx
.
x
;
int
blockSize
=
blockDim
.
x
;
int
sequenceId
=
blockIdx
.
x
;
int
seqStart
=
sequence
[
sequenceId
];
int
seqEnd
=
sequence
[
sequenceId
+
1
];
real
value
=
0
;
int
instances
=
seqEnd
-
seqStart
+
contextLength
-
1
;
outputGrad
+=
seqStart
*
inputDim
*
contextLength
;
inputGrad
+=
seqStart
*
inputDim
;
for
(
int
k
=
0
;
k
<=
inputDim
/
blockSize
;
k
++
)
{
if
(
idx
<
inputDim
)
{
for
(
int
i
=
0
;
i
<
instances
;
i
++
)
{
if
((
i
+
contextStart
)
<
0
)
{
continue
;
}
else
if
((
i
+
contextStart
)
>=
(
seqEnd
-
seqStart
))
{
continue
;
}
else
{
// value = 0;
value
=
inputGrad
[(
i
+
contextStart
)
*
inputDim
+
idx
];
}
int
outx
=
(
i
-
contextLength
)
<
0
?
i
:
(
contextLength
-
1
);
int
outy
=
(
i
-
contextLength
)
<
0
?
0
:
(
i
-
(
contextLength
-
1
));
real
*
output_r
=
outputGrad
+
outy
*
inputDim
*
contextLength
+
outx
*
inputDim
;
for
(
int
j
=
outy
;
j
<
seqEnd
-
seqStart
;
j
++
)
{
value
+=
output_r
[
idx
];
if
(
j
-
outy
==
outx
)
break
;
output_r
+=
(
contextLength
-
1
)
*
inputDim
;
}
inputGrad
[(
i
+
contextStart
)
*
inputDim
+
idx
]
=
value
;
}
}
idx
+=
blockSize
;
}
}
void
hl_context_projection_backward_data
(
real
*
outputGrad
,
const
int
*
sequence
,
real
*
inputGrad
,
int
numSequences
,
int
inputDim
,
int
contextLength
,
int
contextStart
)
{
CHECK_NOTNULL
(
outputGrad
);
CHECK_NOTNULL
(
sequence
);
CHECK_NOTNULL
(
inputGrad
);
int
blockSize
=
128
;
int
blocksX
=
numSequences
;
int
blocksY
=
1
;
dim3
threads
(
blockSize
,
1
);
dim3
grid
(
blocksX
,
blocksY
);
KeContextProjectionBackwardData
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
outputGrad
,
sequence
,
inputGrad
,
inputDim
,
contextLength
,
contextStart
);
CHECK_SYNC
(
"hl_context_projection_backward_data failed"
);
}
template
<
int
THREADS_X
,
int
THREADS_Y
>
__global__
void
KeContextProjectionBackwardWeight
(
real
*
outputGrad
,
const
int
*
sequence
,
real
*
weightGrad
,
int
numSequences
,
int
weightDim
,
int
contextLength
,
int
contextStart
,
int
beginPad
)
{
__shared__
real
sum_s
[
THREADS_Y
][
THREADS_X
];
int
padOfBlock
=
(
weightDim
+
THREADS_X
-
1
)
/
THREADS_X
;
const
int
idx
=
threadIdx
.
x
;
const
int
idy
=
threadIdx
.
y
;
int
padId
=
blockIdx
.
x
/
padOfBlock
;
int
weightIdx
=
idx
+
THREADS_X
*
(
blockIdx
.
x
%
padOfBlock
);
int
instanceId
;
real
value
=
0
;
real
*
output_r
;
sum_s
[
idy
][
idx
]
=
0.0
f
;
if
(
weightIdx
<
weightDim
)
{
for
(
int
seqId
=
idy
;
seqId
<
numSequences
;
seqId
+=
THREADS_Y
)
{
int
seqStart
=
sequence
[
seqId
];
int
seqEnd
=
sequence
[
seqId
+
1
];
output_r
=
outputGrad
+
seqStart
*
weightDim
*
contextLength
;
if
(
contextStart
<
0
)
{
if
(
padId
+
contextStart
<
0
)
{
instanceId
=
padId
;
}
else
{
// beginPad > 0;
instanceId
=
(
padId
-
beginPad
)
+
(
seqEnd
-
seqStart
)
-
contextStart
;
}
}
else
{
if
(
padId
+
(
seqEnd
-
seqStart
)
<
contextStart
)
{
continue
;
}
else
{
// beginPad == 0;
instanceId
=
padId
+
(
seqEnd
-
seqStart
)
-
contextStart
;
}
}
int
outx
=
(
instanceId
-
contextLength
)
<
0
?
instanceId
:
(
contextLength
-
1
);
int
outy
=
(
instanceId
-
contextLength
)
<
0
?
0
:
(
instanceId
-
(
contextLength
-
1
));
output_r
+=
outy
*
weightDim
*
contextLength
+
outx
*
weightDim
;
for
(
int
j
=
outy
;
j
<
seqEnd
-
seqStart
;
j
++
)
{
value
+=
output_r
[
weightIdx
];
if
(
j
-
outy
==
outx
)
break
;
output_r
+=
(
contextLength
-
1
)
*
weightDim
;
}
}
sum_s
[
idy
][
idx
]
=
value
;
}
__syncthreads
();
for
(
int
stride
=
THREADS_Y
/
2
;
stride
>
0
;
stride
=
stride
/
2
)
{
if
(
idy
<
stride
)
{
sum_s
[
idy
][
idx
]
+=
sum_s
[
idy
+
stride
][
idx
];
}
__syncthreads
();
}
__syncthreads
();
if
(
weightIdx
<
weightDim
)
{
if
(
idy
==
0
)
{
weightGrad
[
padId
*
weightDim
+
weightIdx
]
+=
sum_s
[
0
][
idx
];
}
}
}
void
hl_context_projection_backward_weight
(
real
*
outputGrad
,
const
int
*
sequence
,
real
*
weightGrad
,
int
numSequences
,
int
weightDim
,
int
totalPad
,
int
contextLength
,
int
contextStart
,
int
beginPad
)
{
CHECK_NOTNULL
(
outputGrad
);
CHECK_NOTNULL
(
sequence
);
CHECK_NOTNULL
(
weightGrad
);
int
threadsX
=
32
;
int
threadsY
=
32
;
int
blocksX
=
totalPad
*
((
weightDim
+
threadsX
-
1
)
/
threadsX
);
dim3
threads
(
threadsX
,
threadsY
);
dim3
grid
(
blocksX
,
1
);
KeContextProjectionBackwardWeight
<
32
,
32
>
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
outputGrad
,
sequence
,
weightGrad
,
numSequences
,
weightDim
,
contextLength
,
contextStart
,
beginPad
);
CHECK_SYNC
(
"hl_context_projection_backward_weight failed"
);
}
template
<
int
blockDimX
,
int
blockDimY
,
int
gridDimX
,
bool
AddRow
>
__global__
void
KeMatrixAddRows
(
real
*
output
,
real
*
table
,
...
...
paddle/function/CMakeLists.txt
浏览文件 @
be3e2764
...
...
@@ -17,6 +17,9 @@ if(WITH_TESTING)
# file(GLOB test_files . *OpTest.cpp)
# add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
add_simple_unittest
(
CrossMapNormalOpTest
)
add_unittest
(
ContextProjectionOpTest
ContextProjectionOpTest.cpp
../gserver/tests/TestUtil.cpp
)
endif
()
endif
()
...
...
paddle/function/ContextProjectionOp.cpp
0 → 100644
浏览文件 @
be3e2764
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ContextProjectionOp.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Vector.h"
namespace
paddle
{
template
<
>
void
ContextProjectionForward
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
*
out_mat
,
const
CpuMatrix
*
input_mat
,
const
CpuMatrix
*
weight_mat
,
const
CpuIVector
&
seq_vec
,
size_t
context_length
,
int
context_start
,
size_t
begin_pad
)
{
const
int
*
starts
=
seq_vec
.
getData
();
const
size_t
num_sequences
=
seq_vec
.
getSize
()
-
1
;
auto
w_mat
=
const_cast
<
CpuMatrix
*>
(
weight_mat
);
auto
in_mat
=
const_cast
<
CpuMatrix
*>
(
input_mat
);
for
(
size_t
i
=
0
;
i
<
num_sequences
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
context_length
;
++
j
)
{
int
begin
=
starts
[
i
]
+
context_start
+
j
;
int
end
=
starts
[
i
+
1
]
+
context_start
+
j
;
int
dst_begin
=
starts
[
i
];
int
dst_end
=
starts
[
i
+
1
];
if
(
begin
<
starts
[
i
])
{
int64_t
pad_size
=
std
::
min
(
starts
[
i
]
-
begin
,
starts
[
i
+
1
]
-
starts
[
i
]);
MatrixPtr
mat
=
out_mat
->
subMatrix
(
starts
[
i
],
pad_size
);
if
(
w_mat
)
{
MatrixPtr
sub
=
w_mat
->
subMatrix
(
j
,
pad_size
);
mat
->
addAtOffset
(
*
sub
,
j
*
in_mat
->
getWidth
());
}
dst_begin
=
starts
[
i
]
+
pad_size
;
begin
=
starts
[
i
];
}
if
(
end
>
starts
[
i
+
1
])
{
int64_t
pad_size
=
std
::
min
(
end
-
starts
[
i
+
1
],
starts
[
i
+
1
]
-
starts
[
i
]);
MatrixPtr
mat
=
out_mat
->
subMatrix
(
starts
[
i
+
1
]
-
pad_size
,
pad_size
);
if
(
w_mat
)
{
MatrixPtr
sub
=
w_mat
->
subMatrix
(
begin_pad
+
context_start
+
j
-
pad_size
,
pad_size
);
mat
->
addAtOffset
(
*
sub
,
j
*
in_mat
->
getWidth
());
}
dst_end
=
starts
[
i
+
1
]
-
pad_size
;
end
=
starts
[
i
+
1
];
}
if
(
end
<=
begin
)
continue
;
MatrixPtr
src
=
in_mat
->
subMatrix
(
begin
,
end
-
begin
);
MatrixPtr
dst
=
out_mat
->
subMatrix
(
dst_begin
,
dst_end
-
dst_begin
);
dst
->
addAtOffset
(
*
src
,
j
*
in_mat
->
getWidth
());
}
}
}
/**
* \param inputs[0] input value.
* \param inputs[1] input weight.
* \param inputs[2] input sequence.
* \param outputs[0] output value.
*/
template
<
DeviceType
Device
>
class
ContextProjectionForwardFunc
:
public
FunctionBase
{
public:
void
init
(
const
FuncConfig
&
config
)
override
{
context_length_
=
config
.
get
<
size_t
>
(
"context_length"
);
context_start_
=
config
.
get
<
int
>
(
"context_start"
);
begin_pad_
=
config
.
get
<
size_t
>
(
"begin_pad"
);
}
void
calc
(
const
Arguments
&
inputs
,
const
Arguments
&
outputs
,
const
Arguments
&
inouts
)
override
{
CHECK_EQ
(
3
,
inputs
.
size
());
CHECK_EQ
(
1
,
outputs
.
size
());
CHECK_EQ
(
0
,
inouts
.
size
());
CHECK
(
outputs
[
0
].
getData
()
&&
inputs
[
0
].
getData
()
&&
inputs
[
2
].
getData
());
CHECK_EQ
(
outputs
[
0
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inputs
[
0
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inputs
[
1
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inputs
[
2
].
dims_
.
size
(),
1
);
/// dim of output = dim of input * context_length
CHECK_EQ
(
outputs
[
0
].
dims_
[
1
],
inputs
[
0
].
dims_
[
1
]
*
context_length_
);
/// dim of input == dim of weight
CHECK_EQ
(
inputs
[
0
].
dims_
[
1
],
inputs
[
1
].
dims_
[
1
]);
/// input and output has the same batch_size
CHECK_EQ
(
inputs
[
0
].
dims_
[
0
],
outputs
[
0
].
dims_
[
0
]);
auto
out_mat
=
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
outputs
[
0
].
getData
(),
outputs
[
0
].
dims_
[
0
],
outputs
[
0
].
dims_
[
1
]);
const
auto
in_mat
=
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
inputs
[
0
].
getData
(),
inputs
[
0
].
dims_
[
0
],
inputs
[
0
].
dims_
[
1
]);
const
auto
w_mat
=
!
inputs
[
1
].
getData
()
?
nullptr
:
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
inputs
[
1
].
getData
(),
inputs
[
1
].
dims_
[
0
],
inputs
[
1
].
dims_
[
1
]);
typename
SequenceT
<
Device
>::
type
seq_vec
(
inputs
[
2
].
dims_
[
0
],
reinterpret_cast
<
int
*>
(
inputs
[
2
].
getData
()));
ContextProjectionForward
<
Device
>
(
out_mat
.
get
(),
in_mat
.
get
(),
w_mat
.
get
(),
seq_vec
,
context_length_
,
context_start_
,
begin_pad_
);
}
private:
size_t
context_length_
;
int
context_start_
;
size_t
begin_pad_
;
};
template
<
>
void
ContextProjectionBackward
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
*
out_grad_mat
,
CpuMatrix
*
in_grad_mat
,
CpuMatrix
*
w_grad_mat
,
const
CpuIVector
&
seq_vec
,
size_t
context_length
,
int
context_start
,
size_t
begin_pad
,
bool
is_padding
,
size_t
total_pad
)
{
CHECK
(
out_grad_mat
);
size_t
input_dim
=
in_grad_mat
?
in_grad_mat
->
getWidth
()
:
w_grad_mat
?
w_grad_mat
->
getWidth
()
:
0
;
const
int
*
starts
=
seq_vec
.
getData
();
size_t
num_sequences
=
seq_vec
.
getSize
()
-
1
;
for
(
size_t
i
=
0
;
i
<
num_sequences
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
context_length
;
++
j
)
{
int
begin
=
starts
[
i
]
+
context_start
+
j
;
int
end
=
starts
[
i
+
1
]
+
context_start
+
j
;
int
dst_begin
=
starts
[
i
];
int
dst_end
=
starts
[
i
+
1
];
if
(
begin
<
starts
[
i
])
{
int64_t
pad_size
=
std
::
min
(
starts
[
i
]
-
begin
,
starts
[
i
+
1
]
-
starts
[
i
]);
if
(
is_padding
&&
w_grad_mat
)
{
MatrixPtr
mat
=
out_grad_mat
->
subMatrix
(
starts
[
i
],
pad_size
);
MatrixPtr
sub
=
w_grad_mat
->
subMatrix
(
j
,
pad_size
);
sub
->
addAtOffset
(
*
mat
,
j
*
input_dim
);
}
dst_begin
=
starts
[
i
]
+
pad_size
;
begin
=
starts
[
i
];
}
if
(
end
>
starts
[
i
+
1
])
{
int64_t
pad_size
=
std
::
min
(
end
-
starts
[
i
+
1
],
starts
[
i
+
1
]
-
starts
[
i
]);
if
(
is_padding
&&
w_grad_mat
)
{
MatrixPtr
mat
=
out_grad_mat
->
subMatrix
(
starts
[
i
+
1
]
-
pad_size
,
pad_size
);
MatrixPtr
sub
=
w_grad_mat
->
subMatrix
(
begin_pad
+
context_start
+
j
-
pad_size
,
pad_size
);
sub
->
addAtOffset
(
*
mat
,
j
*
input_dim
);
}
dst_end
=
starts
[
i
+
1
]
-
pad_size
;
end
=
starts
[
i
+
1
];
}
if
(
end
<=
begin
)
continue
;
if
(
!
in_grad_mat
)
continue
;
MatrixPtr
src
=
in_grad_mat
->
subMatrix
(
begin
,
end
-
begin
);
MatrixPtr
dst
=
out_grad_mat
->
subMatrix
(
dst_begin
,
dst_end
-
dst_begin
);
src
->
addAtOffset
(
*
dst
,
j
*
input_dim
);
}
}
}
/**
* \param inputs[0] input grad.
* \param inputs[1] weight grad.
* \param inputs[2] input sequence.
* \param outputs[0] output value.
*/
template
<
DeviceType
Device
>
class
ContextProjectionBackwardFunc
:
public
FunctionBase
{
public:
void
init
(
const
FuncConfig
&
config
)
override
{
context_length_
=
config
.
get
<
size_t
>
(
"context_length"
);
context_start_
=
config
.
get
<
int
>
(
"context_start"
);
begin_pad_
=
config
.
get
<
size_t
>
(
"begin_pad"
);
is_padding_
=
config
.
get
<
bool
>
(
"is_padding"
);
total_pad_
=
config
.
get
<
size_t
>
(
"total_pad"
);
}
void
calc
(
const
Arguments
&
inputs
,
const
Arguments
&
outputs
,
const
Arguments
&
inouts
)
override
{
CHECK_EQ
(
3
,
inputs
.
size
());
CHECK_EQ
(
1
,
outputs
.
size
());
CHECK_EQ
(
0
,
inouts
.
size
());
CHECK
(
outputs
[
0
].
getData
()
&&
inputs
[
2
].
getData
());
CHECK_EQ
(
outputs
[
0
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inputs
[
0
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inputs
[
1
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inputs
[
2
].
dims_
.
size
(),
1
);
/// dim of input == dim of weight
CHECK_EQ
(
inputs
[
0
].
dims_
[
1
],
inputs
[
1
].
dims_
[
1
]);
/// input and output has the same batch_size
CHECK_EQ
(
inputs
[
0
].
dims_
[
0
],
outputs
[
0
].
dims_
[
0
]);
/// dim of output = dim of input * context_length
CHECK_EQ
(
outputs
[
0
].
dims_
[
1
],
inputs
[
0
].
dims_
[
1
]
*
context_length_
);
auto
out_grad_mat
=
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
outputs
[
0
].
getData
(),
outputs
[
0
].
dims_
[
0
],
outputs
[
0
].
dims_
[
1
]);
auto
in_grad_mat
=
!
inputs
[
0
].
getData
()
?
nullptr
:
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
inputs
[
0
].
getData
(),
inputs
[
0
].
dims_
[
0
],
inputs
[
0
].
dims_
[
1
]);
auto
w_grad_mat
=
!
inputs
[
1
].
getData
()
?
nullptr
:
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
inputs
[
1
].
getData
(),
inputs
[
1
].
dims_
[
0
],
inputs
[
1
].
dims_
[
1
]);
typename
SequenceT
<
Device
>::
type
seq_vec
(
inputs
[
2
].
dims_
[
0
],
reinterpret_cast
<
int
*>
(
inputs
[
2
].
getData
()));
ContextProjectionBackward
<
Device
>
(
out_grad_mat
.
get
(),
in_grad_mat
?
in_grad_mat
.
get
()
:
nullptr
,
w_grad_mat
?
w_grad_mat
.
get
()
:
nullptr
,
seq_vec
,
context_length_
,
context_start_
,
begin_pad_
,
is_padding_
,
total_pad_
);
}
private:
size_t
context_length_
;
int
context_start_
;
size_t
begin_pad_
;
bool
is_padding_
;
size_t
total_pad_
;
};
/**
* \param inputs[0] input grad.
* \param inputs[1] input sequence.
* \param outputs[0] output grad.
*/
template
<
DeviceType
Device
>
class
ContextProjectionBackwardDataFunc
:
public
FunctionBase
{
public:
void
init
(
const
FuncConfig
&
config
)
override
{
context_length_
=
config
.
get
<
size_t
>
(
"context_length"
);
context_start_
=
config
.
get
<
int
>
(
"context_start"
);
}
void
calc
(
const
Arguments
&
inputs
,
const
Arguments
&
outputs
,
const
Arguments
&
inouts
)
override
{
CHECK_EQ
(
2
,
inputs
.
size
());
CHECK_EQ
(
1
,
outputs
.
size
());
CHECK_EQ
(
0
,
inouts
.
size
());
CHECK
(
inputs
[
0
].
getData
()
&&
outputs
[
0
].
getData
()
&&
inputs
[
1
].
getData
());
CHECK_EQ
(
outputs
[
0
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inputs
[
0
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inputs
[
1
].
dims_
.
size
(),
1
);
CHECK_EQ
(
outputs
[
0
].
dims_
[
1
],
inputs
[
0
].
dims_
[
1
]
*
context_length_
);
/// input and output has the same batch_size
CHECK_EQ
(
inputs
[
0
].
dims_
[
0
],
outputs
[
0
].
dims_
[
0
]);
auto
out_grad_mat
=
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
outputs
[
0
].
getData
(),
outputs
[
0
].
dims_
[
0
],
outputs
[
0
].
dims_
[
1
]);
const
auto
in_grad_mat
=
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
inputs
[
0
].
getData
(),
inputs
[
0
].
dims_
[
0
],
inputs
[
0
].
dims_
[
1
]);
typename
SequenceT
<
Device
>::
type
seq_vec
(
inputs
[
1
].
dims_
[
0
],
reinterpret_cast
<
int
*>
(
inputs
[
1
].
getData
()));
ContextProjectionBackwardData
<
Device
>
(
out_grad_mat
.
get
(),
in_grad_mat
.
get
(),
seq_vec
,
context_length_
,
context_start_
);
}
private:
size_t
context_length_
;
int
context_start_
;
};
/**
* \param inputs[0] weight grad.
* \param inputs[1] input sequence.
* \param outputs[0] output grad.
*/
template
<
DeviceType
Device
>
class
ContextProjectionBackwardWeightFunc
:
public
FunctionBase
{
public:
void
init
(
const
FuncConfig
&
config
)
override
{
context_length_
=
config
.
get
<
size_t
>
(
"context_length"
);
context_start_
=
config
.
get
<
int
>
(
"context_start"
);
begin_pad_
=
config
.
get
<
size_t
>
(
"begin_pad"
);
total_pad_
=
config
.
get
<
size_t
>
(
"total_pad"
);
}
void
calc
(
const
Arguments
&
inputs
,
const
Arguments
&
outputs
,
const
Arguments
&
inouts
)
override
{
CHECK_EQ
(
2
,
inputs
.
size
());
CHECK_EQ
(
1
,
outputs
.
size
());
CHECK_EQ
(
0
,
inouts
.
size
());
CHECK
(
inputs
[
0
].
getData
()
&&
outputs
[
0
].
getData
()
&&
inputs
[
1
].
getData
());
CHECK_EQ
(
outputs
[
0
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inputs
[
0
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inputs
[
1
].
dims_
.
size
(),
1
);
CHECK_EQ
(
outputs
[
0
].
dims_
[
1
],
inputs
[
0
].
dims_
[
1
]
*
context_length_
);
auto
out_grad_mat
=
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
outputs
[
0
].
getData
(),
outputs
[
0
].
dims_
[
0
],
outputs
[
0
].
dims_
[
1
]);
auto
w_grad_mat
=
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
inputs
[
0
].
getData
(),
inputs
[
0
].
dims_
[
0
],
inputs
[
0
].
dims_
[
1
]);
typename
SequenceT
<
Device
>::
type
seq_vec
(
inputs
[
1
].
dims_
[
0
],
reinterpret_cast
<
int
*>
(
inputs
[
1
].
getData
()));
ContextProjectionBackwardWeight
<
Device
>
(
out_grad_mat
.
get
(),
w_grad_mat
.
get
(),
seq_vec
,
context_length_
,
context_start_
,
total_pad_
,
begin_pad_
);
}
private:
size_t
context_length_
;
int
context_start_
;
size_t
begin_pad_
;
size_t
total_pad_
;
};
REGISTER_TYPED_FUNC
(
ContextProjectionForward
,
CPU
,
ContextProjectionForwardFunc
);
REGISTER_TYPED_FUNC
(
ContextProjectionBackward
,
CPU
,
ContextProjectionBackwardFunc
);
#ifndef PADDLE_ONLY_CPU
REGISTER_TYPED_FUNC
(
ContextProjectionForward
,
GPU
,
ContextProjectionForwardFunc
);
REGISTER_TYPED_FUNC
(
ContextProjectionBackward
,
GPU
,
ContextProjectionBackwardFunc
);
REGISTER_TYPED_FUNC
(
ContextProjectionBackwardData
,
GPU
,
ContextProjectionBackwardDataFunc
);
REGISTER_TYPED_FUNC
(
ContextProjectionBackwardWeight
,
GPU
,
ContextProjectionBackwardWeightFunc
);
#endif
}
// namespace paddle
paddle/function/ContextProjectionOp.h
0 → 100644
浏览文件 @
be3e2764
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Function.h"
namespace
paddle
{
/**
* \brief Context Projection Forward.
*
* \param[out] outputs output data.
* \param[in] input input data.
* \param[in] weight input weight.
* \param[in] sequence input data.
* \param[in] context_length consecutive rows for concatenation.
* \param[in] context_start context start position.
* \param[in] begin_pad begining pad position.
* \param[in] is_padding whether padding 0 or not.
*
*/
template
<
DeviceType
Device
>
void
ContextProjectionForward
(
typename
MatrixT
<
Device
>::
type
*
output
,
const
typename
MatrixT
<
Device
>::
type
*
input
,
const
typename
MatrixT
<
Device
>::
type
*
weight
,
const
typename
SequenceT
<
Device
>::
type
&
sequence
,
size_t
context_length
,
int
context_start
,
size_t
begin_pad
);
/**
* \brief Context Projection Backward.
*
* \param[out] outputs output gradient.
* \param[in] input input gradient.
* \param[in] weight input weight gradient.
* \param[in] sequence input data.
* \param[in] context_length consecutive rows for concatenation.
* \param[in] context_start context start position.
* \param[in] begin_pad begining pad position.
* \param[in] is_padding whether padding 0 or not.
*
*/
template
<
DeviceType
Device
>
void
ContextProjectionBackward
(
typename
MatrixT
<
Device
>::
type
*
out_grad
,
typename
MatrixT
<
Device
>::
type
*
in_grad
,
typename
MatrixT
<
Device
>::
type
*
w_grad
,
const
typename
SequenceT
<
Device
>::
type
&
seq_vec
,
size_t
context_length
,
int
context_start
,
size_t
begin_pad
,
bool
is_padding
,
size_t
total_pad
);
template
<
DeviceType
Device
>
void
ContextProjectionBackwardData
(
typename
MatrixT
<
Device
>::
type
*
out_grad
,
typename
MatrixT
<
Device
>::
type
*
in_grad
,
const
typename
SequenceT
<
Device
>::
type
&
sequence
,
size_t
context_length
,
int
context_start
);
template
<
DeviceType
Device
>
void
ContextProjectionBackwardWeight
(
typename
MatrixT
<
Device
>::
type
*
out_grad
,
typename
MatrixT
<
Device
>::
type
*
w_grad
,
const
typename
SequenceT
<
Device
>::
type
&
seq_vec
,
size_t
context_length
,
int
context_start
,
size_t
total_pad
,
size_t
begin_pad
);
}
// namespace paddle
paddle/function/ContextProjectionOpGpu.cu
0 → 100644
浏览文件 @
be3e2764
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_base.h"
#include "ContextProjectionOp.h"
namespace
paddle
{
template
<
bool
padding
>
__global__
void
KeContextProjectionForward
(
const
real
*
input
,
const
int
*
sequence
,
const
real
*
weight
,
real
*
output
,
int
input_dim
,
int
context_length
,
int
context_start
,
int
begin_pad
)
{
int
idx
=
threadIdx
.
x
;
int
block_size
=
blockDim
.
x
;
int
sequenceId
=
blockIdx
.
x
;
int
seq_start
=
sequence
[
sequenceId
];
int
seq_end
=
sequence
[
sequenceId
+
1
];
real
value
=
0
;
int
instances
=
seq_end
-
seq_start
+
context_length
-
1
;
output
+=
seq_start
*
input_dim
*
context_length
;
input
+=
seq_start
*
input_dim
;
for
(
int
k
=
0
;
k
<=
input_dim
/
block_size
;
k
++
)
{
if
(
idx
<
input_dim
)
{
for
(
int
i
=
0
;
i
<
instances
;
i
++
)
{
// i + context_start;
if
((
i
+
context_start
)
<
0
)
{
if
(
padding
)
{
value
=
weight
[
i
*
input_dim
+
idx
];
}
else
{
continue
;
}
}
else
if
((
i
+
context_start
)
>=
(
seq_end
-
seq_start
))
{
if
(
padding
)
{
value
=
weight
[(
begin_pad
+
i
+
context_start
-
(
seq_end
-
seq_start
))
*
input_dim
+
idx
];
}
else
{
continue
;
}
}
else
{
value
=
input
[(
i
+
context_start
)
*
input_dim
+
idx
];
}
int
outx
=
(
i
-
context_length
)
<
0
?
i
:
(
context_length
-
1
);
int
outy
=
(
i
-
context_length
)
<
0
?
0
:
(
i
-
(
context_length
-
1
));
real
*
output_r
=
output
+
outy
*
input_dim
*
context_length
+
outx
*
input_dim
;
for
(
int
j
=
outy
;
j
<
seq_end
-
seq_start
;
j
++
)
{
output_r
[
idx
]
+=
value
;
if
(
j
-
outy
==
outx
)
break
;
output_r
+=
(
context_length
-
1
)
*
input_dim
;
}
}
}
idx
+=
block_size
;
}
}
/**
* @brief Context projection forward.
*
* @param[in] input input sequence.
* @param[in] sequence sequence index.
* @param[in] weight padding data.
* @param[out] output output sequence.
* @param[in] num_sequences number of sequences.
* @param[in] input_dim input sequence dimension.
* @param[in] context_length context length.
* @param[in] context_start context start.
* @param[in] begin_pad number of extra timesteps added at the
* beginning.
*
*/
void
hl_context_projection_forward
(
const
real
*
input
,
const
int
*
sequence
,
const
real
*
weight
,
real
*
output
,
size_t
num_sequences
,
size_t
input_dim
,
size_t
context_length
,
int
context_start
,
size_t
begin_pad
)
{
CHECK_NOTNULL
(
input
);
CHECK_NOTNULL
(
sequence
);
CHECK_NOTNULL
(
output
);
int
block_size
=
128
;
int
blocks_x
=
num_sequences
;
int
blocks_y
=
1
;
dim3
threads
(
block_size
,
1
);
dim3
grid
(
blocks_x
,
blocks_y
);
if
(
weight
)
{
KeContextProjectionForward
<
true
><<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
input
,
sequence
,
weight
,
output
,
input_dim
,
context_length
,
context_start
,
begin_pad
);
}
else
{
KeContextProjectionForward
<
false
><<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
input
,
sequence
,
weight
,
output
,
input_dim
,
context_length
,
context_start
,
begin_pad
);
}
CHECK_SYNC
(
"hl_context_projection_forward failed"
);
}
template
<
>
void
ContextProjectionForward
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
*
output
,
const
GpuMatrix
*
input
,
const
GpuMatrix
*
weight
,
const
GpuIVector
&
sequence
,
size_t
context_length
,
int
context_start
,
size_t
begin_pad
)
{
CHECK
(
input
&&
output
);
hl_context_projection_forward
(
input
->
getData
(),
sequence
.
getData
(),
weight
?
weight
->
getData
()
:
nullptr
,
output
->
getData
(),
sequence
.
getSize
()
-
1
,
input
->
getWidth
(),
context_length
,
context_start
,
begin_pad
);
}
__global__
void
KeContextProjectionBackwardData
(
real
*
out_grad
,
const
int
*
sequence
,
real
*
in_grad
,
int
input_dim
,
int
context_length
,
int
context_start
)
{
int
idx
=
threadIdx
.
x
;
int
block_size
=
blockDim
.
x
;
int
sequenceId
=
blockIdx
.
x
;
int
seq_start
=
sequence
[
sequenceId
];
int
seq_end
=
sequence
[
sequenceId
+
1
];
real
value
=
0
;
int
instances
=
seq_end
-
seq_start
+
context_length
-
1
;
out_grad
+=
seq_start
*
input_dim
*
context_length
;
in_grad
+=
seq_start
*
input_dim
;
for
(
int
k
=
0
;
k
<=
input_dim
/
block_size
;
k
++
)
{
if
(
idx
<
input_dim
)
{
for
(
int
i
=
0
;
i
<
instances
;
i
++
)
{
if
((
i
+
context_start
)
<
0
)
{
continue
;
}
else
if
((
i
+
context_start
)
>=
(
seq_end
-
seq_start
))
{
continue
;
}
else
{
// value = 0;
value
=
in_grad
[(
i
+
context_start
)
*
input_dim
+
idx
];
}
int
outx
=
(
i
-
context_length
)
<
0
?
i
:
(
context_length
-
1
);
int
outy
=
(
i
-
context_length
)
<
0
?
0
:
(
i
-
(
context_length
-
1
));
real
*
output_r
=
out_grad
+
outy
*
input_dim
*
context_length
+
outx
*
input_dim
;
for
(
int
j
=
outy
;
j
<
seq_end
-
seq_start
;
j
++
)
{
value
+=
output_r
[
idx
];
if
(
j
-
outy
==
outx
)
break
;
output_r
+=
(
context_length
-
1
)
*
input_dim
;
}
in_grad
[(
i
+
context_start
)
*
input_dim
+
idx
]
=
value
;
}
}
idx
+=
block_size
;
}
}
/**
* @brief Context projection backward data.
*
* @param[in] out_grad output gradient.
* @param[in] sequence sequence index.
* @param[out] input_grad input gradient.
* @param[in] num_sequences number of sequences.
* @param[in] input_dim input sequence dimension.
* @param[in] context_length context length.
* @param[in] context_start context start.
*
*/
void
hl_context_projection_backward_data
(
real
*
out_grad
,
const
int
*
sequence
,
real
*
input_grad
,
size_t
num_sequences
,
size_t
input_dim
,
size_t
context_length
,
int
context_start
)
{
CHECK_NOTNULL
(
out_grad
);
CHECK_NOTNULL
(
sequence
);
CHECK_NOTNULL
(
input_grad
);
int
block_size
=
128
;
int
blocks_x
=
num_sequences
;
int
blocks_y
=
1
;
dim3
threads
(
block_size
,
1
);
dim3
grid
(
blocks_x
,
blocks_y
);
KeContextProjectionBackwardData
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
out_grad
,
sequence
,
input_grad
,
input_dim
,
context_length
,
context_start
);
CHECK_SYNC
(
"hl_context_projection_backward_data failed"
);
}
template
<
>
void
ContextProjectionBackwardData
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
*
out_grad
,
GpuMatrix
*
in_grad
,
const
GpuIVector
&
sequence
,
size_t
context_length
,
int
context_start
)
{
CHECK
(
in_grad
&&
out_grad
);
hl_context_projection_backward_data
(
out_grad
->
getData
(),
sequence
.
getData
(),
in_grad
->
getData
(),
sequence
.
getSize
()
-
1
,
in_grad
->
getWidth
(),
context_length
,
context_start
);
}
template
<
int
THREADS_X
,
int
THREADS_Y
>
__global__
void
KeContextProjectionBackwardWeight
(
real
*
out_grad
,
const
int
*
sequence
,
real
*
w_grad
,
int
num_sequences
,
int
w_dim
,
int
context_length
,
int
context_start
,
int
begin_pad
)
{
__shared__
real
sum_s
[
THREADS_Y
][
THREADS_X
];
int
pad_of_block
=
(
w_dim
+
THREADS_X
-
1
)
/
THREADS_X
;
const
int
idx
=
threadIdx
.
x
;
const
int
idy
=
threadIdx
.
y
;
int
padId
=
blockIdx
.
x
/
pad_of_block
;
int
weight_idx
=
idx
+
THREADS_X
*
(
blockIdx
.
x
%
pad_of_block
);
int
instanceId
;
real
value
=
0
;
real
*
output_r
;
sum_s
[
idy
][
idx
]
=
0.0
f
;
if
(
weight_idx
<
w_dim
)
{
for
(
int
seqId
=
idy
;
seqId
<
num_sequences
;
seqId
+=
THREADS_Y
)
{
int
seq_start
=
sequence
[
seqId
];
int
seq_end
=
sequence
[
seqId
+
1
];
output_r
=
out_grad
+
seq_start
*
w_dim
*
context_length
;
if
(
context_start
<
0
)
{
if
(
padId
+
context_start
<
0
)
{
instanceId
=
padId
;
}
else
{
// begin_pad > 0;
instanceId
=
(
padId
-
begin_pad
)
+
(
seq_end
-
seq_start
)
-
context_start
;
}
}
else
{
if
(
padId
+
(
seq_end
-
seq_start
)
<
context_start
)
{
continue
;
}
else
{
// begin_pad == 0;
instanceId
=
padId
+
(
seq_end
-
seq_start
)
-
context_start
;
}
}
int
outx
=
(
instanceId
-
context_length
)
<
0
?
instanceId
:
(
context_length
-
1
);
int
outy
=
(
instanceId
-
context_length
)
<
0
?
0
:
(
instanceId
-
(
context_length
-
1
));
output_r
+=
outy
*
w_dim
*
context_length
+
outx
*
w_dim
;
for
(
int
j
=
outy
;
j
<
seq_end
-
seq_start
;
j
++
)
{
value
+=
output_r
[
weight_idx
];
if
(
j
-
outy
==
outx
)
break
;
output_r
+=
(
context_length
-
1
)
*
w_dim
;
}
}
sum_s
[
idy
][
idx
]
=
value
;
}
__syncthreads
();
for
(
int
stride
=
THREADS_Y
/
2
;
stride
>
0
;
stride
=
stride
/
2
)
{
if
(
idy
<
stride
)
{
sum_s
[
idy
][
idx
]
+=
sum_s
[
idy
+
stride
][
idx
];
}
__syncthreads
();
}
__syncthreads
();
if
(
weight_idx
<
w_dim
)
{
if
(
idy
==
0
)
{
w_grad
[
padId
*
w_dim
+
weight_idx
]
+=
sum_s
[
0
][
idx
];
}
}
}
/**
* @brief Context projection backward weight.
*
* @param[in] out_grad output gradient.
* @param[in] sequence sequence index.
* @param[out] w_grad weight gradient.
* @param[in] num_sequences number of sequences.
* @param[in] w_dim input sequence dimension.
* @param[in] total_pad number of extra timesteps.
* @param[in] context_length context length.
* @param[in] context_start context start.
* @param[in] begin_pad number of extra timesteps added at the
* beginning.
*
*/
void
hl_context_projection_backward_weight
(
real
*
out_grad
,
const
int
*
sequence
,
real
*
w_grad
,
size_t
num_sequences
,
size_t
w_dim
,
size_t
total_pad
,
size_t
context_length
,
int
context_start
,
size_t
begin_pad
)
{
CHECK_NOTNULL
(
out_grad
);
CHECK_NOTNULL
(
sequence
);
CHECK_NOTNULL
(
w_grad
);
int
threads_x
=
32
;
int
threads_y
=
32
;
int
blocks_x
=
total_pad
*
((
w_dim
+
threads_x
-
1
)
/
threads_x
);
dim3
threads
(
threads_x
,
threads_y
);
dim3
grid
(
blocks_x
,
1
);
KeContextProjectionBackwardWeight
<
32
,
32
>
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
out_grad
,
sequence
,
w_grad
,
num_sequences
,
w_dim
,
context_length
,
context_start
,
begin_pad
);
CHECK_SYNC
(
"hl_context_projection_backward_weight failed"
);
}
template
<
>
void
ContextProjectionBackwardWeight
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
*
out_grad
,
GpuMatrix
*
w_grad
,
const
GpuIVector
&
seq_vec
,
size_t
context_length
,
int
context_start
,
size_t
total_pad
,
size_t
begin_pad
)
{
CHECK
(
out_grad
&&
w_grad
);
hl_context_projection_backward_weight
(
out_grad
->
getData
(),
seq_vec
.
getData
(),
w_grad
->
getData
(),
seq_vec
.
getSize
()
-
1
,
w_grad
->
getWidth
(),
total_pad
,
context_length
,
context_start
,
begin_pad
);
}
template
<
>
void
ContextProjectionBackward
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
*
out_grad
,
GpuMatrix
*
in_grad
,
GpuMatrix
*
w_grad
,
const
GpuIVector
&
sequence
,
size_t
context_length
,
int
context_start
,
size_t
begin_pad
,
bool
is_padding
,
size_t
total_pad
)
{
CHECK
(
out_grad
);
if
(
in_grad
)
{
ContextProjectionBackwardData
<
DEVICE_TYPE_GPU
>
(
out_grad
,
in_grad
,
sequence
,
context_length
,
context_start
);
}
if
(
is_padding
&&
w_grad
)
{
ContextProjectionBackwardWeight
<
DEVICE_TYPE_GPU
>
(
out_grad
,
w_grad
,
sequence
,
context_length
,
context_start
,
total_pad
,
begin_pad
);
}
}
}
// namespace paddle
paddle/function/ContextProjectionOpTest.cpp
0 → 100644
浏览文件 @
be3e2764
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "FunctionTest.h"
#include "paddle/gserver/tests/TestUtil.h"
#include "paddle/math/Matrix.h"
using
namespace
paddle
;
// NOLINT
void
testMatrixProjectionForward
(
int
context_start
,
size_t
context_length
,
bool
is_padding
,
size_t
batch_size
,
size_t
input_dim
)
{
size_t
pad
=
std
::
max
(
0
,
-
context_start
)
+
std
::
max
(
0
,
(
int
)(
context_start
+
context_length
-
1
));
if
(
pad
==
0
)
is_padding
=
false
;
FunctionCompare
compare
(
"ContextProjectionForward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
.
set
(
"context_start"
,
context_start
)
.
set
(
"begin_pad"
,
std
::
max
(
0
,
-
context_start
)));
CpuMatrix
cpu_in
(
batch_size
,
input_dim
);
cpu_in
.
randomizeUniform
();
GpuMatrix
gpu_in
(
batch_size
,
input_dim
);
gpu_in
.
copyFrom
(
cpu_in
);
auto
cpu_weight
=
is_padding
?
std
::
make_shared
<
CpuMatrix
>
(
pad
,
input_dim
)
:
nullptr
;
auto
gpu_weight
=
is_padding
?
std
::
make_shared
<
GpuMatrix
>
(
pad
,
input_dim
)
:
nullptr
;
if
(
is_padding
)
{
cpu_weight
->
randomizeUniform
();
gpu_weight
->
copyFrom
(
*
cpu_weight
);
}
IVectorPtr
cpu_seq
;
generateSequenceStartPositions
(
batch_size
,
cpu_seq
);
IVectorPtr
gpu_seq
=
IVector
::
create
(
cpu_seq
->
getSize
(),
true
);
gpu_seq
->
copyFrom
(
*
cpu_seq
);
CpuMatrix
cpu_out
(
batch_size
,
input_dim
*
context_length
);
GpuMatrix
gpu_out
(
batch_size
,
input_dim
*
context_length
);
cpu_out
.
randomizeUniform
();
gpu_out
.
copyFrom
(
cpu_out
);
compare
.
getCpuFunction
()
->
calc
(
{
Tensor
(
cpu_in
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
cpu_weight
?
cpu_weight
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()})},
{
Tensor
(
cpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
compare
.
getGpuFunction
()
->
calc
(
{
Tensor
(
gpu_in
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
gpu_weight
?
gpu_weight
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()})},
{
Tensor
(
gpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
autotest
::
TensorCheckEqual
(
cpu_out
,
gpu_out
);
}
void
testMatrixProjectionBackward
(
int
context_start
,
int
context_length
,
bool
is_padding
,
size_t
batch_size
,
size_t
input_dim
)
{
size_t
pad
=
std
::
max
(
0
,
-
context_start
)
+
std
::
max
(
0
,
(
int
)(
context_start
+
context_length
-
1
));
if
(
pad
==
0
)
is_padding
=
false
;
FunctionCompare
compare
(
"ContextProjectionBackward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
.
set
(
"context_start"
,
context_start
)
.
set
(
"begin_pad"
,
std
::
max
(
0
,
-
context_start
))
.
set
(
"is_padding"
,
is_padding
)
.
set
(
"total_pad"
,
pad
));
CpuMatrix
cpu_in_grad
(
batch_size
,
input_dim
);
cpu_in_grad
.
randomizeUniform
();
GpuMatrix
gpu_in_grad
(
batch_size
,
input_dim
);
gpu_in_grad
.
copyFrom
(
cpu_in_grad
);
CpuMatrix
cpu_out_grad
(
batch_size
,
input_dim
*
context_length
);
cpu_out_grad
.
randomizeUniform
();
GpuMatrix
gpu_out_grad
(
batch_size
,
input_dim
*
context_length
);
gpu_out_grad
.
copyFrom
(
cpu_out_grad
);
IVectorPtr
cpu_seq
;
generateSequenceStartPositions
(
batch_size
,
cpu_seq
);
IVectorPtr
gpu_seq
=
IVector
::
create
(
cpu_seq
->
getSize
(),
true
);
gpu_seq
->
copyFrom
(
*
cpu_seq
);
auto
cpu_w_grad
=
is_padding
?
std
::
make_shared
<
CpuMatrix
>
(
pad
,
input_dim
)
:
nullptr
;
auto
gpu_w_grad
=
is_padding
?
std
::
make_shared
<
GpuMatrix
>
(
pad
,
input_dim
)
:
nullptr
;
if
(
is_padding
)
{
cpu_w_grad
->
randomizeUniform
();
gpu_w_grad
->
copyFrom
(
*
cpu_w_grad
);
}
compare
.
getCpuFunction
()
->
calc
(
{
Tensor
(
cpu_in_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
cpu_w_grad
?
cpu_w_grad
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()})},
{
Tensor
(
cpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
compare
.
getGpuFunction
()
->
calc
(
{
Tensor
(
gpu_in_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
gpu_w_grad
?
gpu_w_grad
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()})},
{
Tensor
(
gpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
autotest
::
TensorCheckErr
(
cpu_in_grad
,
gpu_in_grad
);
if
(
is_padding
)
{
autotest
::
TensorCheckErr
(
*
cpu_w_grad
,
*
gpu_w_grad
);
}
}
TEST
(
ContextProjection
,
projection
)
{
for
(
auto
context_start
:
{
-
5
,
-
3
,
-
1
,
0
,
3
})
{
for
(
auto
context_length
:
{
1
,
2
,
5
,
7
})
{
for
(
auto
trainable_padding
:
{
false
,
true
})
{
for
(
auto
batch_size
:
{
1
,
2
,
5
,
20
,
100
})
{
for
(
auto
input_dim
:
{
15
,
32
,
63
,
128
,
200
})
{
VLOG
(
3
)
<<
" context_start="
<<
context_start
<<
" context_length="
<<
context_length
<<
" trainable_padding="
<<
trainable_padding
<<
" batch_size="
<<
batch_size
<<
" input_dim="
<<
input_dim
;
testMatrixProjectionForward
(
context_start
,
context_length
,
trainable_padding
,
batch_size
,
input_dim
);
testMatrixProjectionBackward
(
context_start
,
context_length
,
trainable_padding
,
batch_size
,
input_dim
);
}
}
}
}
}
}
paddle/function/Function.cpp
浏览文件 @
be3e2764
...
...
@@ -30,6 +30,20 @@ real FuncConfig::get<real>(const std::string& key) const {
return
it
->
second
.
r
;
}
template
<
>
int
FuncConfig
::
get
<
int
>
(
const
std
::
string
&
key
)
const
{
auto
it
=
valueMap_
.
find
(
key
);
CHECK
(
it
!=
valueMap_
.
end
())
<<
"Cannot find value: '"
<<
key
<<
"'"
;
return
it
->
second
.
i
;
}
template
<
>
bool
FuncConfig
::
get
<
bool
>
(
const
std
::
string
&
key
)
const
{
auto
it
=
valueMap_
.
find
(
key
);
CHECK
(
it
!=
valueMap_
.
end
())
<<
"Cannot find value: '"
<<
key
<<
"'"
;
return
it
->
second
.
b
;
}
template
<
>
FuncConfig
&
FuncConfig
::
set
<
size_t
>
(
const
std
::
string
&
key
,
size_t
v
)
{
CHECK_EQ
(
valueMap_
.
count
(
key
),
0
)
<<
"Duplicated value: "
<<
key
;
...
...
@@ -44,6 +58,20 @@ FuncConfig& FuncConfig::set<real>(const std::string& key, real v) {
return
*
this
;
}
template
<
>
FuncConfig
&
FuncConfig
::
set
<
int
>
(
const
std
::
string
&
key
,
int
v
)
{
CHECK_EQ
(
valueMap_
.
count
(
key
),
0
)
<<
"Duplicated value: "
<<
key
;
valueMap_
[
key
].
i
=
v
;
return
*
this
;
}
template
<
>
FuncConfig
&
FuncConfig
::
set
<
bool
>
(
const
std
::
string
&
key
,
bool
v
)
{
CHECK_EQ
(
valueMap_
.
count
(
key
),
0
)
<<
"Duplicated value: "
<<
key
;
valueMap_
[
key
].
b
=
v
;
return
*
this
;
}
ClassRegistrar
<
FunctionBase
>
FunctionBase
::
funcRegistrar_
;
}
// namespace paddle
paddle/function/Function.h
浏览文件 @
be3e2764
...
...
@@ -40,6 +40,19 @@ struct MatrixT<DEVICE_TYPE_GPU> {
using
type
=
GpuMatrix
;
};
template
<
DeviceType
Device
>
struct
SequenceT
;
template
<
>
struct
SequenceT
<
DEVICE_TYPE_CPU
>
{
using
type
=
CpuIVector
;
};
template
<
>
struct
SequenceT
<
DEVICE_TYPE_GPU
>
{
using
type
=
GpuIVector
;
};
typedef
std
::
vector
<
size_t
>
Dims
;
class
Tensor
{
...
...
@@ -59,6 +72,8 @@ public:
union
value
{
size_t
s
;
real
r
;
int
i
;
bool
b
;
};
template
<
typename
T
>
...
...
paddle/function/FunctionTest.h
浏览文件 @
be3e2764
...
...
@@ -33,25 +33,33 @@ public:
// init cpu and gpu arguments
auto
initArgs
=
[
=
](
Arguments
&
cpuArgs
,
Arguments
&
gpuArgs
,
const
Arguments
&
inArgs
)
{
for
(
auto
arg
:
inArgs
)
{
for
(
const
auto
arg
:
inArgs
)
{
size_t
size
=
sizeof
(
real
);
for
(
auto
dim
:
arg
.
dims_
)
{
for
(
const
auto
dim
:
arg
.
dims_
)
{
size
*=
dim
;
}
cpuMemory
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuArgs
.
emplace_back
(
Tensor
((
real
*
)
cpuMemory
.
back
()
->
getBuf
(),
arg
.
dims_
));
gpuArgs
.
emplace_back
(
Tensor
((
real
*
)
gpuMemory
.
back
()
->
getBuf
(),
arg
.
dims_
));
// will use an api to refactor this code.
CpuVector
cpuVector
(
size
/
sizeof
(
real
),
(
real
*
)
cpuArgs
.
back
().
getData
());
GpuVector
gpuVector
(
size
/
sizeof
(
real
),
(
real
*
)
gpuArgs
.
back
().
getData
());
cpuVector
.
uniform
(
0.001
,
1
);
gpuVector
.
copyFrom
(
cpuVector
);
if
(
arg
.
getData
())
{
// todo(tianbing), waste unnecessary mem here
cpuMemory
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuArgs
.
emplace_back
(
Tensor
((
real
*
)
arg
.
getData
(),
arg
.
dims_
));
gpuArgs
.
emplace_back
(
Tensor
((
real
*
)
arg
.
getData
(),
arg
.
dims_
));
// already init outside
}
else
{
cpuMemory
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuArgs
.
emplace_back
(
Tensor
((
real
*
)
cpuMemory
.
back
()
->
getBuf
(),
arg
.
dims_
));
gpuArgs
.
emplace_back
(
Tensor
((
real
*
)
gpuMemory
.
back
()
->
getBuf
(),
arg
.
dims_
));
// will use an api to refactor this code.
CpuVector
cpuVector
(
size
/
sizeof
(
real
),
(
real
*
)
cpuArgs
.
back
().
getData
());
GpuVector
gpuVector
(
size
/
sizeof
(
real
),
(
real
*
)
gpuArgs
.
back
().
getData
());
cpuVector
.
uniform
(
0.001
,
1
);
gpuVector
.
copyFrom
(
cpuVector
);
}
}
};
initArgs
(
cpuInputs
,
gpuInputs
,
inputs
);
...
...
@@ -81,6 +89,10 @@ public:
checkArgs
(
cpuInouts
,
gpuInouts
);
}
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
cpu
;
}
std
::
shared_ptr
<
FunctionBase
>
getGpuFunction
()
const
{
return
gpu
;
}
protected:
std
::
shared_ptr
<
FunctionBase
>
cpu
;
std
::
shared_ptr
<
FunctionBase
>
gpu
;
...
...
paddle/gserver/layers/ContextProjection.cpp
浏览文件 @
be3e2764
...
...
@@ -38,6 +38,32 @@ ContextProjection::ContextProjection(const ProjectionConfig& config,
CHECK_EQ
(
inputDim
*
totalPad
,
parameter
->
getSize
());
weight_
.
reset
(
new
Weight
(
totalPad
,
inputDim
,
parameter
));
}
// init forward_ and backward_ functions
init
();
}
bool
ContextProjection
::
init
()
{
size_t
context_length
=
config_
.
context_length
();
int
context_start
=
config_
.
context_start
();
bool
is_padding
=
config_
.
trainable_padding
();
size_t
total_pad
=
is_padding
?
beginPad_
+
endPad_
:
0
;
createFunction
(
forward_
,
"ContextProjectionForward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
.
set
(
"context_start"
,
context_start
)
.
set
(
"begin_pad"
,
beginPad_
));
createFunction
(
backward_
,
"ContextProjectionBackward"
,
FuncConfig
()
.
set
(
"context_length"
,
context_length
)
.
set
(
"context_start"
,
context_start
)
.
set
(
"begin_pad"
,
beginPad_
)
.
set
(
"is_padding"
,
is_padding
)
.
set
(
"total_pad"
,
total_pad
));
return
true
;
}
void
ContextProjection
::
resetState
()
{
...
...
@@ -78,25 +104,29 @@ LayerStatePtr ContextProjection::getState() {
}
void
ContextProjection
::
forward
()
{
CHECK
(
in_
->
value
);
CHECK
(
in_
->
value
&&
out_
->
value
);
CHECK
(
in_
->
sequenceStartPositions
);
auto
startPositions
=
in_
->
sequenceStartPositions
->
getVector
(
useGpu_
);
int64_t
inputDim
=
in_
->
value
->
getWidth
(
);
int64_t
dim
=
out_
->
value
->
getWidth
();
CHECK_EQ
(
dim
,
inputDim
*
config_
.
context_length
())
;
size_t
input_dim
=
in_
->
value
->
getWidth
(
);
size_t
dim
=
out_
->
value
->
getWidth
();
CHECK_EQ
(
dim
,
input_dim
*
config_
.
context_length
()
);
size_t
batch_size
=
in_
->
value
->
getHeight
();
CHECK_EQ
(
forward_
.
size
(),
1
)
<<
"Only one forward function here"
;
REGISTER_TIMER_INFO
(
"ContextProjectionForward"
,
getName
().
c_str
());
bool
isPadding
=
config_
.
trainable_padding
();
out_
->
value
->
contextProjectionForward
(
*
(
in_
->
value
),
state_
?
state_
.
get
()
:
isPadding
?
weight_
->
getW
().
get
()
:
nullptr
,
*
startPositions
,
config_
.
context_length
(),
config_
.
context_start
(),
beginPad_
,
state_
?
true
:
isPadding
);
bool
is_padding
=
config_
.
trainable_padding
();
/// first use state_, otherwise use weight_(padding false === w nullptr)
auto
w_ptr
=
state_
?
state_
.
get
()
:
is_padding
?
weight_
->
getW
().
get
()
:
nullptr
;
auto
start_pos
=
in_
->
sequenceStartPositions
;
forward_
[
0
]
->
calc
({
Tensor
(
in_
->
value
->
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
w_ptr
?
w_ptr
->
getData
()
:
nullptr
,
Dims
{
w_ptr
?
w_ptr
->
getHeight
()
:
0
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
const_cast
<
int
*>
(
start_pos
->
getData
(
useGpu_
))),
Dims
{
start_pos
->
getSize
()})},
{
Tensor
(
out_
->
value
->
getData
(),
Dims
{
batch_size
,
dim
})},
{});
if
(
state_
&&
config_
.
context_start
()
<
0
)
{
CHECK_EQ
(
1
,
in_
->
getNumSequences
());
...
...
@@ -118,41 +148,27 @@ void ContextProjection::forward() {
}
void
ContextProjection
::
backward
(
const
UpdateCallback
&
callback
)
{
CHECK
(
in_
->
value
);
int64_t
inputDim
=
in_
->
value
->
getWidth
();
int64_t
dim
=
out_
->
value
->
getWidth
();
CHECK_EQ
(
dim
,
inputDim
*
config_
.
context_length
());
auto
startPositions
=
in_
->
sequenceStartPositions
->
getVector
(
useGpu_
);
CHECK
(
in_
->
value
&&
out_
->
value
&&
out_
->
grad
);
size_t
input_dim
=
in_
->
value
->
getWidth
();
size_t
dim
=
out_
->
value
->
getWidth
();
CHECK_EQ
(
dim
,
input_dim
*
config_
.
context_length
());
size_t
batch_size
=
in_
->
value
->
getHeight
();
CHECK_EQ
(
batch_size
,
out_
->
value
->
getHeight
());
CHECK_EQ
(
backward_
.
size
(),
1
)
<<
"Only one backward function here"
;
REGISTER_TIMER_INFO
(
"ContextProjectionBackward"
,
getName
().
c_str
());
bool
isPadding
=
config_
.
trainable_padding
();
if
(
!
out_
->
grad
->
useGpu
())
{
out_
->
grad
->
contextProjectionBackward
(
in_
->
grad
.
get
(),
isPadding
?
weight_
->
getWGrad
().
get
()
:
nullptr
,
*
startPositions
,
config_
.
context_length
(),
config_
.
context_start
(),
beginPad_
,
isPadding
);
}
else
{
if
(
in_
->
grad
)
{
out_
->
grad
->
contextProjectionBackwardData
(
*
(
in_
->
grad
),
*
startPositions
,
config_
.
context_length
(),
config_
.
context_start
());
}
if
(
isPadding
&&
weight_
->
getWGrad
())
{
out_
->
grad
->
contextProjectionBackwardWeight
(
*
(
weight_
->
getWGrad
()),
*
startPositions
,
config_
.
context_length
(),
config_
.
context_start
(),
weight_
->
getWGrad
()
->
getHeight
(),
beginPad_
);
}
}
bool
is_padding
=
config_
.
trainable_padding
();
auto
start_pos
=
in_
->
sequenceStartPositions
;
auto
w_ptr
=
is_padding
?
weight_
->
getWGrad
()
:
nullptr
;
backward_
[
0
]
->
calc
({
Tensor
(
in_
->
grad
?
in_
->
grad
->
getData
()
:
nullptr
,
Dims
{
batch_size
,
input_dim
}),
Tensor
(
w_ptr
?
w_ptr
->
getData
()
:
nullptr
,
Dims
{
w_ptr
?
w_ptr
->
getHeight
()
:
0
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
const_cast
<
int
*>
(
start_pos
->
getData
(
useGpu_
))),
Dims
{
start_pos
->
getSize
()})},
{
Tensor
(
out_
->
grad
->
getData
(),
Dims
{
batch_size
,
dim
})},
{});
if
(
config_
.
trainable_padding
())
{
weight_
->
getParameterPtr
()
->
incUpdate
(
callback
);
...
...
paddle/gserver/layers/ContextProjection.h
浏览文件 @
be3e2764
...
...
@@ -61,6 +61,8 @@ public:
virtual
LayerStatePtr
getState
();
virtual
bool
init
();
protected:
std
::
unique_ptr
<
Weight
>
weight_
;
/// number of extra timesteps added at the beginning
...
...
paddle/gserver/layers/Projection.h
浏览文件 @
be3e2764
...
...
@@ -88,11 +88,37 @@ public:
*/
virtual
LayerStatePtr
getState
()
{
return
nullptr
;
}
/**
* init forward_ and backward_ functions
*/
virtual
bool
init
()
{
return
true
;
}
/**
* Get output size of projection.
*/
size_t
getOutputSize
()
const
{
return
config_
.
output_size
();
}
protected:
/**
* Create layer function. Function is called in forward or backward.
* \param function, Layer::forward_ or Layer::backward_
* \param name, function name
* \param config, initialization configuration for the function
*/
void
createFunction
(
std
::
vector
<
std
::
shared_ptr
<
FunctionBase
>>&
function
,
const
std
::
string
&
name
,
const
FuncConfig
&
config
)
{
if
(
useGpu_
)
{
function
.
emplace_back
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-GPU"
));
}
else
{
function
.
emplace_back
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-CPU"
));
}
auto
&
func
=
function
.
back
();
func
->
init
(
config
);
}
protected:
/// Config of projection
ProjectionConfig
config_
;
...
...
@@ -106,5 +132,9 @@ protected:
const
Argument
*
out_
;
/// Store `passType` passed to forward()
PassType
passType_
;
/// Layer forward function
std
::
vector
<
std
::
shared_ptr
<
FunctionBase
>>
forward_
;
/// Layer backward function
std
::
vector
<
std
::
shared_ptr
<
FunctionBase
>>
backward_
;
};
}
// namespace paddle
paddle/math/Matrix.cpp
浏览文件 @
be3e2764
...
...
@@ -1304,68 +1304,6 @@ void GpuMatrix::maxSequenceBackward(Matrix& outputGrad,
hl_max_sequence_backward
(
outGrad
,
maxIndex
,
inputGrad
,
numSequences
,
dim
);
}
void
GpuMatrix
::
contextProjectionForward
(
Matrix
&
input
,
Matrix
*
weight
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
size_t
beginPad
,
bool
isPadding
)
{
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
input
));
CHECK
(
dynamic_cast
<
const
GpuIVector
*>
(
&
sequence
));
if
(
weight
)
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
weight
));
CHECK_EQ
(
getWidth
(),
input
.
getWidth
()
*
contextLength
);
hl_context_projection_forward
(
input
.
getData
(),
sequence
.
getData
(),
isPadding
?
weight
->
getData
()
:
NULL
,
getData
(),
sequence
.
getSize
()
-
1
,
input
.
getWidth
(),
contextLength
,
contextStart
,
beginPad
,
isPadding
);
}
void
GpuMatrix
::
contextProjectionBackwardData
(
Matrix
&
inputGrad
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
)
{
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
inputGrad
));
CHECK
(
dynamic_cast
<
const
GpuIVector
*>
(
&
sequence
));
CHECK_EQ
(
getWidth
(),
inputGrad
.
getWidth
()
*
contextLength
);
hl_context_projection_backward_data
(
getData
(),
sequence
.
getData
(),
inputGrad
.
getData
(),
sequence
.
getSize
()
-
1
,
inputGrad
.
getWidth
(),
contextLength
,
contextStart
);
}
void
GpuMatrix
::
contextProjectionBackwardWeight
(
Matrix
&
weightGrad
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
int
totalPad
,
size_t
beginPad
)
{
CHECK
(
dynamic_cast
<
GpuMatrix
*>
(
&
weightGrad
));
CHECK
(
dynamic_cast
<
const
GpuIVector
*>
(
&
sequence
));
CHECK_EQ
(
getWidth
(),
weightGrad
.
getWidth
()
*
contextLength
);
hl_context_projection_backward_weight
(
getData
(),
sequence
.
getData
(),
weightGrad
.
getData
(),
sequence
.
getSize
()
-
1
,
weightGrad
.
getWidth
(),
totalPad
,
contextLength
,
contextStart
,
beginPad
);
}
void
GpuMatrix
::
paramReluForward
(
Matrix
&
data
,
Matrix
&
W
)
{
CHECK
(
data
.
useGpu_
==
true
&&
W
.
useGpu_
==
true
)
<<
"Matrix type are not equal"
;
...
...
@@ -2203,113 +2141,6 @@ void CpuMatrix::maxSequenceBackward(Matrix& outputGrad,
}
}
void
CpuMatrix
::
contextProjectionForward
(
Matrix
&
input
,
Matrix
*
weight
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
size_t
beginPad
,
bool
isPadding
)
{
auto
input_ptr
=
dynamic_cast
<
CpuMatrix
*>
(
&
input
);
auto
seq_ptr
=
dynamic_cast
<
const
CpuIVector
*>
(
&
sequence
);
CHECK
(
input_ptr
&&
seq_ptr
);
if
(
weight
)
CHECK
(
dynamic_cast
<
CpuMatrix
*>
(
weight
));
CHECK_EQ
(
getWidth
(),
input_ptr
->
getWidth
()
*
contextLength
);
const
int
*
starts
=
seq_ptr
->
getData
();
size_t
numSequences
=
seq_ptr
->
getSize
()
-
1
;
for
(
size_t
i
=
0
;
i
<
numSequences
;
++
i
)
{
for
(
int
j
=
0
;
j
<
contextLength
;
++
j
)
{
int
begin
=
starts
[
i
]
+
contextStart
+
j
;
int
end
=
starts
[
i
+
1
]
+
contextStart
+
j
;
int
dstBegin
=
starts
[
i
];
int
dstEnd
=
starts
[
i
+
1
];
if
(
begin
<
starts
[
i
])
{
int64_t
padSize
=
std
::
min
(
starts
[
i
]
-
begin
,
starts
[
i
+
1
]
-
starts
[
i
]);
MatrixPtr
mat
=
this
->
subMatrix
(
starts
[
i
],
padSize
);
if
(
isPadding
)
{
MatrixPtr
sub
=
weight
->
subMatrix
(
j
,
padSize
);
mat
->
addAtOffset
(
*
sub
,
j
*
input_ptr
->
getWidth
());
}
dstBegin
=
starts
[
i
]
+
padSize
;
begin
=
starts
[
i
];
}
if
(
end
>
starts
[
i
+
1
])
{
int64_t
padSize
=
std
::
min
(
end
-
starts
[
i
+
1
],
starts
[
i
+
1
]
-
starts
[
i
]);
MatrixPtr
mat
=
this
->
subMatrix
(
starts
[
i
+
1
]
-
padSize
,
padSize
);
if
(
isPadding
)
{
MatrixPtr
sub
=
weight
->
subMatrix
(
beginPad
+
contextStart
+
j
-
padSize
,
padSize
);
mat
->
addAtOffset
(
*
sub
,
j
*
input_ptr
->
getWidth
());
}
dstEnd
=
starts
[
i
+
1
]
-
padSize
;
end
=
starts
[
i
+
1
];
}
if
(
end
<=
begin
)
continue
;
MatrixPtr
src
=
input_ptr
->
subMatrix
(
begin
,
end
-
begin
);
MatrixPtr
dst
=
this
->
subMatrix
(
dstBegin
,
dstEnd
-
dstBegin
);
dst
->
addAtOffset
(
*
src
,
j
*
input_ptr
->
getWidth
());
}
}
}
void
CpuMatrix
::
contextProjectionBackward
(
Matrix
*
inputGrad
,
Matrix
*
weightGrad
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
size_t
beginPad
,
bool
isPadding
)
{
if
(
inputGrad
)
CHECK
(
dynamic_cast
<
CpuMatrix
*>
(
inputGrad
));
if
(
weightGrad
)
CHECK
(
dynamic_cast
<
CpuMatrix
*>
(
weightGrad
));
CHECK
(
dynamic_cast
<
const
CpuIVector
*>
(
&
sequence
));
int64_t
inputDim
=
inputGrad
?
inputGrad
->
getWidth
()
:
weightGrad
?
weightGrad
->
getWidth
()
:
0
;
CHECK_EQ
(
getWidth
(),
inputDim
*
contextLength
);
const
int
*
starts
=
sequence
.
getData
();
size_t
numSequences
=
sequence
.
getSize
()
-
1
;
for
(
size_t
i
=
0
;
i
<
numSequences
;
++
i
)
{
for
(
int
j
=
0
;
j
<
contextLength
;
++
j
)
{
int
begin
=
starts
[
i
]
+
contextStart
+
j
;
int
end
=
starts
[
i
+
1
]
+
contextStart
+
j
;
int
dstBegin
=
starts
[
i
];
int
dstEnd
=
starts
[
i
+
1
];
if
(
begin
<
starts
[
i
])
{
int64_t
padSize
=
std
::
min
(
starts
[
i
]
-
begin
,
starts
[
i
+
1
]
-
starts
[
i
]);
if
(
isPadding
&&
weightGrad
)
{
MatrixPtr
mat
=
this
->
subMatrix
(
starts
[
i
],
padSize
);
MatrixPtr
sub
=
weightGrad
->
subMatrix
(
j
,
padSize
);
sub
->
addAtOffset
(
*
mat
,
j
*
inputDim
);
}
dstBegin
=
starts
[
i
]
+
padSize
;
begin
=
starts
[
i
];
}
if
(
end
>
starts
[
i
+
1
])
{
int64_t
padSize
=
std
::
min
(
end
-
starts
[
i
+
1
],
starts
[
i
+
1
]
-
starts
[
i
]);
if
(
isPadding
&&
weightGrad
)
{
MatrixPtr
mat
=
this
->
subMatrix
(
starts
[
i
+
1
]
-
padSize
,
padSize
);
MatrixPtr
sub
=
weightGrad
->
subMatrix
(
beginPad
+
contextStart
+
j
-
padSize
,
padSize
);
sub
->
addAtOffset
(
*
mat
,
j
*
inputDim
);
}
dstEnd
=
starts
[
i
+
1
]
-
padSize
;
end
=
starts
[
i
+
1
];
}
if
(
end
<=
begin
)
continue
;
if
(
!
inputGrad
)
continue
;
MatrixPtr
src
=
inputGrad
->
subMatrix
(
begin
,
end
-
begin
);
MatrixPtr
dst
=
this
->
subMatrix
(
dstBegin
,
dstEnd
-
dstBegin
);
src
->
addAtOffset
(
*
dst
,
j
*
inputDim
);
}
}
}
inline
void
vecAddTo
(
real
*
a
,
const
real
*
b
,
size_t
len
)
{
for
(
unsigned
int
i
=
0
;
i
<
len
;
++
i
)
{
a
[
i
]
+=
b
[
i
];
...
...
paddle/math/Matrix.h
浏览文件 @
be3e2764
...
...
@@ -972,42 +972,6 @@ public:
LOG
(
FATAL
)
<<
"Not implemeted"
;
}
virtual
void
contextProjectionForward
(
Matrix
&
input
,
Matrix
*
weight
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
size_t
beginPad
,
bool
isPadding
)
{
LOG
(
FATAL
)
<<
"Not implemeted"
;
}
virtual
void
contextProjectionBackward
(
Matrix
*
inputGrad
,
Matrix
*
weightGrad
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
size_t
beginPad
,
bool
isPadding
)
{
LOG
(
FATAL
)
<<
"Not implemeted"
;
}
virtual
void
contextProjectionBackwardData
(
Matrix
&
inputGrad
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
)
{
LOG
(
FATAL
)
<<
"Not implemeted"
;
}
virtual
void
contextProjectionBackwardWeight
(
Matrix
&
weightGrad
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
int
totalPad
,
size_t
beginPad
)
{
LOG
(
FATAL
)
<<
"Not implemeted"
;
}
/**
* @code
* this.row[i] += table.row[ids[i]]
...
...
@@ -1442,26 +1406,6 @@ public:
const
IVector
&
sequence
,
IVector
&
index
);
void
contextProjectionForward
(
Matrix
&
input
,
Matrix
*
weight
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
size_t
beginPad
,
bool
isPadding
);
void
contextProjectionBackwardData
(
Matrix
&
inputGrad
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
);
void
contextProjectionBackwardWeight
(
Matrix
&
weightGrad
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
int
totalPad
,
size_t
beginPad
);
void
bilinearForward
(
const
Matrix
&
in
,
const
size_t
inImgH
,
const
size_t
inImgW
,
...
...
@@ -1648,22 +1592,6 @@ public:
const
IVector
&
sequence
,
IVector
&
index
);
void
contextProjectionForward
(
Matrix
&
input
,
Matrix
*
weight
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
size_t
beginPad
,
bool
isPadding
);
void
contextProjectionBackward
(
Matrix
*
inputGrad
,
Matrix
*
weightGrad
,
const
IVector
&
sequence
,
int
contextLength
,
int
contextStart
,
size_t
beginPad
,
bool
isPadding
);
real
*
getRow
(
size_t
row
)
{
return
BaseMatrix
::
rowBuf
(
row
);
}
virtual
real
*
getRowBuf
(
size_t
row
)
{
return
getRow
(
row
);
}
...
...
paddle/math/tests/test_matrixCompare.cpp
浏览文件 @
be3e2764
...
...
@@ -29,148 +29,6 @@ using namespace std; // NOLINT
using
autotest
::
TensorCheckEqual
;
using
autotest
::
TensorCheckErr
;
void
testMatrixProjectionForward
(
int
contextStart
,
int
contextLength
,
bool
padding
,
int
batchSize
,
int
inputDim
)
{
MatrixPtr
cpuInput
=
std
::
make_shared
<
CpuMatrix
>
(
batchSize
,
inputDim
);
MatrixPtr
gpuInput
=
std
::
make_shared
<
GpuMatrix
>
(
batchSize
,
inputDim
);
cpuInput
->
randomizeUniform
();
gpuInput
->
copyFrom
(
*
cpuInput
);
int
pad
=
std
::
max
(
0
,
-
contextStart
)
+
std
::
max
(
0
,
contextStart
+
contextLength
-
1
);
if
(
pad
==
0
)
padding
=
false
;
MatrixPtr
cpuWeight
=
nullptr
;
MatrixPtr
gpuWeight
=
nullptr
;
if
(
padding
)
{
cpuWeight
=
std
::
make_shared
<
CpuMatrix
>
(
pad
,
inputDim
);
gpuWeight
=
std
::
make_shared
<
GpuMatrix
>
(
pad
,
inputDim
);
cpuWeight
->
randomizeUniform
();
gpuWeight
->
copyFrom
(
*
cpuWeight
);
}
IVectorPtr
cpuSequence
;
generateSequenceStartPositions
(
batchSize
,
cpuSequence
);
IVectorPtr
gpuSequence
=
IVector
::
create
(
cpuSequence
->
getSize
(),
true
);
gpuSequence
->
copyFrom
(
*
cpuSequence
);
MatrixPtr
cpuOutput
=
std
::
make_shared
<
CpuMatrix
>
(
batchSize
,
inputDim
*
contextLength
);
MatrixPtr
gpuOutput
=
std
::
make_shared
<
GpuMatrix
>
(
batchSize
,
inputDim
*
contextLength
);
cpuOutput
->
randomizeUniform
();
gpuOutput
->
copyFrom
(
*
cpuOutput
);
// calculate
int
beginPad
=
std
::
max
(
0
,
-
contextStart
);
cpuOutput
->
contextProjectionForward
(
*
cpuInput
,
cpuWeight
.
get
(),
*
cpuSequence
,
contextLength
,
contextStart
,
beginPad
,
padding
);
gpuOutput
->
contextProjectionForward
(
*
gpuInput
,
gpuWeight
.
get
(),
*
gpuSequence
,
contextLength
,
contextStart
,
beginPad
,
padding
);
TensorCheckEqual
(
*
cpuOutput
,
*
gpuOutput
);
}
void
testMatrixProjectionBackward
(
int
contextStart
,
int
contextLength
,
bool
padding
,
int
batchSize
,
int
inputDim
)
{
MatrixPtr
cpuOutputGrad
=
std
::
make_shared
<
CpuMatrix
>
(
batchSize
,
inputDim
*
contextLength
);
MatrixPtr
gpuOutputGrad
=
std
::
make_shared
<
GpuMatrix
>
(
batchSize
,
inputDim
*
contextLength
);
cpuOutputGrad
->
randomizeUniform
();
gpuOutputGrad
->
copyFrom
(
*
cpuOutputGrad
);
IVectorPtr
cpuSequence
;
generateSequenceStartPositions
(
batchSize
,
cpuSequence
);
IVectorPtr
gpuSequence
=
IVector
::
create
(
cpuSequence
->
getSize
(),
true
);
gpuSequence
->
copyFrom
(
*
cpuSequence
);
MatrixPtr
cpuInputGrad
=
std
::
make_shared
<
CpuMatrix
>
(
batchSize
,
inputDim
);
MatrixPtr
gpuInputGrad
=
std
::
make_shared
<
GpuMatrix
>
(
batchSize
,
inputDim
);
cpuInputGrad
->
randomizeUniform
();
gpuInputGrad
->
copyFrom
(
*
cpuInputGrad
);
int
pad
=
std
::
max
(
0
,
-
contextStart
)
+
std
::
max
(
0
,
contextStart
+
contextLength
-
1
);
if
(
pad
==
0
)
padding
=
false
;
MatrixPtr
cpuWeightGrad
=
nullptr
;
MatrixPtr
gpuWeightGrad
=
nullptr
;
if
(
padding
)
{
cpuWeightGrad
=
std
::
make_shared
<
CpuMatrix
>
(
pad
,
inputDim
);
gpuWeightGrad
=
std
::
make_shared
<
GpuMatrix
>
(
pad
,
inputDim
);
cpuWeightGrad
->
randomizeUniform
();
gpuWeightGrad
->
copyFrom
(
*
cpuWeightGrad
);
}
// calculate
int
beginPad
=
std
::
max
(
0
,
-
contextStart
);
cpuOutputGrad
->
contextProjectionBackward
(
cpuInputGrad
.
get
(),
cpuWeightGrad
.
get
(),
*
cpuSequence
,
contextLength
,
contextStart
,
beginPad
,
padding
);
gpuOutputGrad
->
contextProjectionBackwardData
(
*
gpuInputGrad
,
*
gpuSequence
,
contextLength
,
contextStart
);
if
(
padding
)
{
gpuOutputGrad
->
contextProjectionBackwardWeight
(
*
gpuWeightGrad
,
*
gpuSequence
,
contextLength
,
contextStart
,
pad
,
beginPad
);
}
TensorCheckErr
(
*
cpuInputGrad
,
*
gpuInputGrad
);
if
(
padding
)
{
TensorCheckErr
(
*
cpuWeightGrad
,
*
gpuWeightGrad
);
}
}
TEST
(
Matrix
,
projection
)
{
for
(
auto
contextStart
:
{
-
5
,
-
3
,
-
1
,
0
,
3
})
{
for
(
auto
contextLength
:
{
1
,
2
,
5
,
7
})
{
for
(
auto
trainablePadding
:
{
false
,
true
})
{
for
(
auto
batchSize
:
{
1
,
2
,
5
,
20
,
100
})
{
for
(
auto
inputDim
:
{
15
,
32
,
63
,
128
,
200
})
{
VLOG
(
3
)
<<
" contextStart="
<<
contextStart
<<
" contextLength="
<<
contextLength
<<
" trainablePadding="
<<
trainablePadding
<<
" batchSize="
<<
batchSize
<<
" inputDim="
<<
inputDim
;
testMatrixProjectionForward
(
contextStart
,
contextLength
,
trainablePadding
,
batchSize
,
inputDim
);
testMatrixProjectionBackward
(
contextStart
,
contextLength
,
trainablePadding
,
batchSize
,
inputDim
);
}
}
}
}
}
}
void
testMatrixMaxSequence
(
int
batchSize
,
int
inputDim
)
{
// forward
MatrixPtr
cpuInput
=
std
::
make_shared
<
CpuMatrix
>
(
batchSize
,
inputDim
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录