Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
23ac0b78
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
23ac0b78
编写于
1月 10, 2017
作者:
X
xutianbing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
merge Daoyuan's FuncArgs, pass the ContextProjection test.
上级
1482ec43
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
101 addition
and
247 deletion
+101
-247
paddle/function/CMakeLists.txt
paddle/function/CMakeLists.txt
+1
-1
paddle/function/ContextProjectionOp.cpp
paddle/function/ContextProjectionOp.cpp
+35
-146
paddle/function/ContextProjectionOp.h
paddle/function/ContextProjectionOp.h
+1
-1
paddle/function/ContextProjectionOpGpu.cu
paddle/function/ContextProjectionOpGpu.cu
+0
-1
paddle/function/ContextProjectionOpTest.cpp
paddle/function/ContextProjectionOpTest.cpp
+38
-37
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+17
-55
paddle/gserver/layers/ContextProjection.cpp
paddle/gserver/layers/ContextProjection.cpp
+9
-6
未找到文件。
paddle/function/CMakeLists.txt
浏览文件 @
23ac0b78
...
...
@@ -24,7 +24,7 @@ if(WITH_TESTING)
add_simple_unittest
(
TensorTypeTest
)
add_simple_unittest
(
BufferArgTest
)
add_simple_unittest
(
FunctionTest
)
#
add_simple_unittest(ContextProjectionOpTest)
add_simple_unittest
(
ContextProjectionOpTest
)
endif
()
endif
()
...
...
paddle/function/ContextProjectionOp.cpp
浏览文件 @
23ac0b78
...
...
@@ -125,11 +125,11 @@ public:
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
auto
out_mat
=
outputs
[
0
].
matrix
<
Device
>
();
auto
in_mat
=
inputs
[
0
].
matrix
<
Device
>
();
auto
w_mat
=
!
inputs
[
1
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
const
auto
in_mat
=
inputs
[
0
].
matrix
<
Device
>
();
const
auto
w_mat
=
!
inputs
[
1
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
inputs
[
1
].
matrix
<
Device
>
();
auto
seq_vec
=
inputs
[
2
].
vector
<
int
,
Device
>
();
const
auto
seq_vec
=
inputs
[
2
].
vector
<
int
,
Device
>
();
ContextProjectionForward
<
Device
>
(
out_mat
,
in_mat
,
w_mat
,
...
...
@@ -150,7 +150,6 @@ private:
*
*/
template
<
>
<<<<<<<
HEAD
void
ContextProjectionBackward
<
DEVICE_TYPE_CPU
>
(
const
CpuMatrix
&
out_grad_mat
,
CpuMatrix
&
in_grad_mat
,
CpuMatrix
&
w_grad_mat
,
...
...
@@ -174,7 +173,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
int64_t
pad_size
=
std
::
min
(
starts
[
i
]
-
begin
,
starts
[
i
+
1
]
-
starts
[
i
]);
if
(
is_padding
&&
w_grad_mat
)
{
MatrixPtr
mat
=
out_grad_mat
.
subMatrix
(
starts
[
i
],
pad_size
);
MatrixPtr
mat
=
const_cast
<
CpuMatrix
&>
(
out_grad_mat
)
.
subMatrix
(
starts
[
i
],
pad_size
);
MatrixPtr
sub
=
w_grad_mat
.
subMatrix
(
j
,
pad_size
);
sub
->
addAtOffset
(
*
mat
,
j
*
input_dim
);
}
...
...
@@ -185,8 +185,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
int64_t
pad_size
=
std
::
min
(
end
-
starts
[
i
+
1
],
starts
[
i
+
1
]
-
starts
[
i
]);
if
(
is_padding
&&
w_grad_mat
)
{
MatrixPtr
mat
=
out_grad_mat
.
subMatrix
(
starts
[
i
+
1
]
-
pad_size
,
pad_size
);
MatrixPtr
mat
=
const_cast
<
CpuMatrix
&>
(
out_grad_mat
)
.
subMatrix
(
starts
[
i
+
1
]
-
pad_size
,
pad_size
);
MatrixPtr
sub
=
w_grad_mat
.
subMatrix
(
begin_pad
+
context_start
+
j
-
pad_size
,
pad_size
);
sub
->
addAtOffset
(
*
mat
,
j
*
input_dim
);
...
...
@@ -197,7 +197,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
if
(
end
<=
begin
)
continue
;
if
(
!
in_grad_mat
)
continue
;
MatrixPtr
src
=
in_grad_mat
.
subMatrix
(
begin
,
end
-
begin
);
MatrixPtr
dst
=
out_grad_mat
.
subMatrix
(
dst_begin
,
dst_end
-
dst_begin
);
MatrixPtr
dst
=
const_cast
<
CpuMatrix
&>
(
out_grad_mat
)
.
subMatrix
(
dst_begin
,
dst_end
-
dst_begin
);
src
->
addAtOffset
(
*
dst
,
j
*
input_dim
);
}
}
...
...
@@ -208,9 +209,9 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
* Update the weight gradient and input layer gradient with backprop
*
* \param inputs[0] input sequence.
* \param inputs[1]
output
grad.
* \param
inouts[0] input
grad.
* \param
ino
uts[1] weight grad.
* \param inputs[1]
output layer
grad.
* \param
outputs[0] input layer
grad.
* \param
outp
uts[1] weight grad.
*/
template
<
DeviceType
Device
>
class
ContextProjectionBackwardFunc
:
public
FunctionBase
{
...
...
@@ -224,32 +225,34 @@ public:
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
((
size_t
)
3
,
inputs
.
size
());
CHECK_EQ
((
size_t
)
1
,
outputs
.
size
());
CHECK_EQ
((
size_t
)
2
,
inputs
.
size
());
CHECK_EQ
((
size_t
)
2
,
outputs
.
size
());
CHECK
(
outputs
[
0
].
data
()
&&
inputs
[
2
].
data
());
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK
(
inputs
[
0
].
data
()
&&
inputs
[
1
].
data
());
CHECK_EQ
(
inputs
[
0
].
shape
().
ndims
(),
(
size_t
)
1
);
CHECK_EQ
(
inputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
2
].
shape
().
ndims
(),
(
size_t
)
1
);
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
outputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
/// dim of input == dim of weight
CHECK_EQ
(
inputs
[
0
].
shape
()[
1
],
in
puts
[
1
].
shape
()[
1
]);
/// input and output has the same batch_size
CHECK_EQ
(
inputs
[
0
].
shape
()[
0
],
outputs
[
0
].
shape
()[
0
]);
/// dim of output
= dim of input
* context_length
CHECK_EQ
(
outputs
[
0
].
shape
()[
1
],
in
puts
[
0
].
shape
()[
1
]
*
context_length_
);
/// dim of input
grad
== dim of weight
CHECK_EQ
(
outputs
[
0
].
shape
()[
1
],
out
puts
[
1
].
shape
()[
1
]);
/// input and output
grad
has the same batch_size
CHECK_EQ
(
outputs
[
0
].
shape
()[
0
],
inputs
[
1
].
shape
()[
0
]);
/// dim of output
val = dim of input grad
* context_length
CHECK_EQ
(
inputs
[
1
].
shape
()[
1
],
out
puts
[
0
].
shape
()[
1
]
*
context_length_
);
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
CHECK_EQ
(
outputs
[
1
].
getArgType
(),
ADD_TO
);
auto
out_grad_mat
=
outputs
[
0
].
matrix
<
Device
>
();
const
auto
seq_vec
=
inputs
[
0
].
vector
<
int
,
Device
>
();
const
auto
out_grad_mat
=
inputs
[
1
].
matrix
<
Device
>
();
auto
in_grad_mat
=
!
inputs
[
0
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
inputs
[
0
].
matrix
<
Device
>
();
auto
w_grad_mat
=
!
inputs
[
1
].
data
()
!
outputs
[
0
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
inputs
[
1
].
matrix
<
Device
>
();
auto
seq_vec
=
inputs
[
2
].
vector
<
int
,
Device
>
();
:
outputs
[
0
].
matrix
<
Device
>
();
auto
w_grad_mat
=
!
outputs
[
1
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
outputs
[
1
].
matrix
<
Device
>
();
ContextProjectionBackward
<
Device
>
(
out_grad_mat
,
in_grad_mat
,
w_grad_mat
,
...
...
@@ -269,112 +272,6 @@ private:
size_t
total_pad_
;
};
#if 0
/**
* Context Projection Backward Data Function.
* Update gradient of the input layer with backprop.
*
* \param inouts[0] input grad.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardDataFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
context_length_ = config.get<size_t>("context_length");
context_start_ = config.get<int>("context_start");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());
CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
/// input and output grad have the same batch_size
CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]);
typename SequenceT<Device>::type seq_vec(
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
ContextProjectionBackwardData<Device>(out_grad_mat.get(),
in_grad_mat.get(),
seq_vec,
context_length_,
context_start_);
}
private:
size_t context_length_;
int context_start_;
};
/**
* Context Projection Backward Weight Function.
* Update weight gradient with backprop.
*
* \param inouts[0] weight grad.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardWeightFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
context_length_ = config.get<size_t>("context_length");
context_start_ = config.get<int>("context_start");
begin_pad_ = config.get<size_t>("begin_pad");
total_pad_ = config.get<size_t>("total_pad");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());
CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
typename SequenceT<Device>::type seq_vec(
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
ContextProjectionBackwardWeight<Device>(out_grad_mat.get(),
w_grad_mat.get(),
seq_vec,
context_length_,
context_start_,
total_pad_,
begin_pad_);
}
private:
size_t context_length_;
int context_start_;
size_t begin_pad_;
size_t total_pad_;
};
#endif
REGISTER_TYPED_FUNC
(
ContextProjectionForward
,
CPU
,
ContextProjectionForwardFunc
);
...
...
@@ -388,13 +285,5 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC
(
ContextProjectionBackward
,
GPU
,
ContextProjectionBackwardFunc
);
#if 0
REGISTER_TYPED_FUNC(ContextProjectionBackwardData,
GPU,
ContextProjectionBackwardDataFunc);
REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight,
GPU,
ContextProjectionBackwardWeightFunc);
#endif
#endif
}
// namespace paddle
paddle/function/ContextProjectionOp.h
浏览文件 @
23ac0b78
...
...
@@ -56,7 +56,7 @@ void ContextProjectionForward(
*/
template
<
DeviceType
DType
>
void
ContextProjectionBackward
(
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
const
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
typename
Tensor
<
real
,
DType
>::
Matrix
&
in_grad
,
typename
Tensor
<
real
,
DType
>::
Matrix
&
w_grad
,
const
typename
Tensor
<
int
,
DType
>::
Vector
&
seq_vec
,
...
...
paddle/function/ContextProjectionOpGpu.cu
浏览文件 @
23ac0b78
...
...
@@ -217,7 +217,6 @@ void hl_context_projection_backward_data(const real* out_grad,
}
template
<
>
<<<<<<<
HEAD
void
ContextProjectionBackwardData
<
DEVICE_TYPE_GPU
>
(
const
GpuMatrix
&
out_grad
,
GpuMatrix
&
in_grad
,
const
GpuIVector
&
sequence
,
...
...
paddle/function/ContextProjectionOpTest.cpp
浏览文件 @
23ac0b78
...
...
@@ -56,24 +56,25 @@ void testMatrixProjectionForward(int context_start,
cpu_out
.
randomizeUniform
();
gpu_out
.
copyFrom
(
cpu_out
);
compare
.
getCpuFunction
()
->
calc
(
{
Tensor
(
cpu_in
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
cpu_weight
?
cpu_weight
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()})},
{},
{
Tensor
(
cpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})});
compare
.
getGpuFunction
()
->
calc
(
{
Tensor
(
gpu_in
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
gpu_weight
?
gpu_weight
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()})},
{},
{
Tensor
(
gpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})});
BufferArgs
cpu_inputs
;
BufferArgs
cpu_outputs
;
cpu_inputs
.
addArg
(
cpu_in
);
cpu_inputs
.
addArg
(
cpu_weight
?
*
cpu_weight
:
CpuMatrix
(
nullptr
,
0
,
input_dim
));
cpu_inputs
.
addArg
(
*
cpu_seq
);
cpu_outputs
.
addArg
(
cpu_out
,
ADD_TO
);
compare
.
getCpuFunction
()
->
calc
(
cpu_inputs
,
cpu_outputs
);
BufferArgs
gpu_inputs
;
BufferArgs
gpu_outputs
;
gpu_inputs
.
addArg
(
gpu_in
);
gpu_inputs
.
addArg
(
gpu_weight
?
*
gpu_weight
:
GpuMatrix
(
nullptr
,
0
,
input_dim
));
gpu_inputs
.
addArg
(
*
gpu_seq
);
gpu_outputs
.
addArg
(
gpu_out
,
ADD_TO
);
compare
.
getGpuFunction
()
->
calc
(
gpu_inputs
,
gpu_outputs
);
autotest
::
TensorCheckEqual
(
cpu_out
,
gpu_out
);
}
...
...
@@ -119,25 +120,25 @@ void testMatrixProjectionBackward(int context_start,
gpu_w_grad
->
copyFrom
(
*
cpu_w_grad
);
}
compare
.
getCpuFunction
()
->
calc
(
{
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()}),
Tensor
(
cpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{},
{
Tensor
(
cpu_in_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
cpu_w_grad
?
cpu_w_grad
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
})}
);
compare
.
getGpuFunction
()
->
calc
(
{
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()}),
Tensor
(
gpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{},
{
Tensor
(
gpu_in_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
gpu_w_grad
?
gpu_w_grad
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
})}
);
BufferArgs
cpu_inputs
;
BufferArgs
cpu_outputs
;
cpu_inputs
.
addArg
(
*
cpu_seq
);
cpu_inputs
.
addArg
(
cpu_out_grad
);
cpu_outputs
.
addArg
(
cpu_in_grad
,
ADD_TO
);
cpu_outputs
.
addArg
(
cpu_w_grad
?
*
cpu_w_grad
:
CpuMatrix
(
nullptr
,
0
,
input_dim
),
ADD_TO
);
compare
.
getCpuFunction
()
->
calc
(
cpu_inputs
,
cpu_outputs
);
BufferArgs
gpu_inputs
;
BufferArgs
gpu_outputs
;
gpu_inputs
.
addArg
(
*
gpu_seq
);
gpu_inputs
.
addArg
(
gpu_out_grad
);
gpu_outputs
.
addArg
(
gpu_in_grad
,
ADD_TO
);
gpu_outputs
.
addArg
(
gpu_w_grad
?
*
gpu_w_grad
:
GpuMatrix
(
nullptr
,
0
,
input_dim
),
ADD_TO
);
compare
.
getGpuFunction
()
->
calc
(
gpu_inputs
,
gpu_outputs
);
autotest
::
TensorCheckErr
(
cpu_in_grad
,
gpu_in_grad
);
if
(
is_padding
)
{
...
...
paddle/function/FunctionTest.h
浏览文件 @
23ac0b78
...
...
@@ -27,66 +27,28 @@ public:
gpu
->
init
(
config
);
}
void
cmpWithArg
(
const
Argument
s
&
inputs
,
const
Argument
s
&
outputs
,
const
Argument
s
&
inouts
)
{
void
cmpWithArg
(
const
BufferArg
s
&
inputs
,
const
BufferArg
s
&
outputs
,
const
BufferArg
s
&
inouts
)
{
// init cpu and gpu arguments
auto
initArgs
=
[
=
](
Arguments
&
cpuArgs
,
Arguments
&
gpuArgs
,
const
Arguments
&
inArgs
)
{
for
(
const
auto
arg
:
inArgs
)
{
size_t
size
=
sizeof
(
real
);
for
(
const
auto
dim
:
arg
.
dims_
)
{
size
*=
dim
;
}
if
(
arg
.
getData
())
{
// todo(tianbing), waste unnecessary mem here
cpuMemory
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuArgs
.
emplace_back
(
Tensor
((
real
*
)
arg
.
getData
(),
arg
.
dims_
));
gpuArgs
.
emplace_back
(
Tensor
((
real
*
)
arg
.
getData
(),
arg
.
dims_
));
// already init outside
}
else
{
cpuMemory
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuArgs
.
emplace_back
(
Tensor
((
real
*
)
cpuMemory
.
back
()
->
getBuf
(),
arg
.
dims_
));
gpuArgs
.
emplace_back
(
Tensor
((
real
*
)
gpuMemory
.
back
()
->
getBuf
(),
arg
.
dims_
));
// will use an api to refactor this code.
CpuVector
cpuVector
(
size
/
sizeof
(
real
),
(
real
*
)
cpuArgs
.
back
().
getData
());
GpuVector
gpuVector
(
size
/
sizeof
(
real
),
(
real
*
)
gpuArgs
.
back
().
getData
());
cpuVector
.
uniform
(
0.001
,
1
);
gpuVector
.
copyFrom
(
cpuVector
);
}
}
BufferArgs
&
cpuArgs
,
BufferArgs
&
gpuArgs
,
const
BufferArgs
&
inArgs
)
{
/// leave it empty to pass the compile of ContextProjectionTest
/// Daoyuan is working on FunctionTest
/// and I will further merge with it
};
initArgs
(
cpuInputs
,
gpuInputs
,
inputs
);
initArgs
(
cpuOutputs
,
gpuOutputs
,
outputs
);
initArgs
(
cpuInouts
,
gpuInouts
,
inouts
);
// function calculate
cpu
->
calc
(
cpuInputs
,
cpuOutputs
,
cpuInouts
);
gpu
->
calc
(
gpuInputs
,
gpuOutputs
,
gpuInouts
);
cpu
->
calc
(
cpuInputs
,
cpuOutputs
);
gpu
->
calc
(
gpuInputs
,
gpuOutputs
);
// check outputs and inouts
auto
checkArgs
=
[
=
](
const
Arguments
&
cpuArgs
,
const
Arguments
&
gpuArgs
)
{
for
(
size_t
i
=
0
;
i
<
cpuArgs
.
size
();
i
++
)
{
auto
cpu
=
cpuArgs
[
i
];
auto
gpu
=
gpuArgs
[
i
];
size_t
size
=
1
;
for
(
auto
dim
:
cpu
.
dims_
)
{
size
*=
dim
;
}
CpuVector
cpuVector
(
size
,
(
real
*
)
cpu
.
getData
());
GpuVector
gpuVector
(
size
,
(
real
*
)
gpu
.
getData
());
autotest
::
TensorCheckErr
(
cpuVector
,
gpuVector
);
}
auto
checkArgs
=
[
=
](
const
BufferArgs
&
cpuArgs
,
const
BufferArgs
&
gpuArgs
)
{
/// leave it open
};
checkArgs
(
cpuOutputs
,
gpuOutputs
);
checkArgs
(
cpuInouts
,
gpuInouts
);
}
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
cpu
;
}
...
...
@@ -98,12 +60,12 @@ protected:
std
::
shared_ptr
<
FunctionBase
>
gpu
;
std
::
vector
<
CpuMemHandlePtr
>
cpuMemory
;
std
::
vector
<
GpuMemHandlePtr
>
gpuMemory
;
Argument
s
cpuInputs
;
Argument
s
cpuOutputs
;
Argument
s
cpuInouts
;
Argument
s
gpuInputs
;
Argument
s
gpuOutputs
;
Argument
s
gpuInouts
;
BufferArg
s
cpuInputs
;
BufferArg
s
cpuOutputs
;
BufferArg
s
cpuInouts
;
BufferArg
s
gpuInputs
;
BufferArg
s
gpuOutputs
;
BufferArg
s
gpuInouts
;
};
}
// namespace paddle
paddle/gserver/layers/ContextProjection.cpp
浏览文件 @
23ac0b78
...
...
@@ -166,13 +166,16 @@ void ContextProjection::backward(const UpdateCallback& callback) {
BufferArgs
inputs
;
BufferArgs
outputs
;
inputs
.
addArg
(
CpuMatrix
(
in_
->
grad
?
in_
->
grad
->
getData
()
:
nullptr
,
batch_size
,
input_dim
));
inputs
.
addArg
(
CpuMatrix
(
w_ptr
?
w_ptr
->
getData
()
:
nullptr
,
w_ptr
?
w_ptr
->
getHeight
()
:
0
,
input_dim
));
inputs
.
addArg
(
*
in_
->
sequenceStartPositions
->
getVector
(
useGpu_
));
outputs
.
addArg
(
*
out_
->
grad
,
ADD_TO
);
inputs
.
addArg
(
*
out_
->
grad
);
outputs
.
addArg
(
CpuMatrix
(
in_
->
grad
?
in_
->
grad
->
getData
()
:
nullptr
,
batch_size
,
input_dim
),
ADD_TO
);
outputs
.
addArg
(
CpuMatrix
(
w_ptr
?
w_ptr
->
getData
()
:
nullptr
,
w_ptr
?
w_ptr
->
getHeight
()
:
0
,
input_dim
),
ADD_TO
);
backward_
[
0
]
->
calc
(
inputs
,
outputs
);
if
(
config_
.
trainable_padding
())
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录