Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
23ac0b78
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
23ac0b78
编写于
1月 10, 2017
作者:
X
xutianbing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
merge Daoyuan's FuncArgs, pass the ContextProjection test.
上级
1482ec43
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
101 addition
and
247 deletion
+101
-247
paddle/function/CMakeLists.txt
paddle/function/CMakeLists.txt
+1
-1
paddle/function/ContextProjectionOp.cpp
paddle/function/ContextProjectionOp.cpp
+35
-146
paddle/function/ContextProjectionOp.h
paddle/function/ContextProjectionOp.h
+1
-1
paddle/function/ContextProjectionOpGpu.cu
paddle/function/ContextProjectionOpGpu.cu
+0
-1
paddle/function/ContextProjectionOpTest.cpp
paddle/function/ContextProjectionOpTest.cpp
+38
-37
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+17
-55
paddle/gserver/layers/ContextProjection.cpp
paddle/gserver/layers/ContextProjection.cpp
+9
-6
未找到文件。
paddle/function/CMakeLists.txt
浏览文件 @
23ac0b78
...
...
@@ -24,7 +24,7 @@ if(WITH_TESTING)
add_simple_unittest
(
TensorTypeTest
)
add_simple_unittest
(
BufferArgTest
)
add_simple_unittest
(
FunctionTest
)
#
add_simple_unittest(ContextProjectionOpTest)
add_simple_unittest
(
ContextProjectionOpTest
)
endif
()
endif
()
...
...
paddle/function/ContextProjectionOp.cpp
浏览文件 @
23ac0b78
...
...
@@ -125,11 +125,11 @@ public:
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
auto
out_mat
=
outputs
[
0
].
matrix
<
Device
>
();
auto
in_mat
=
inputs
[
0
].
matrix
<
Device
>
();
auto
w_mat
=
!
inputs
[
1
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
const
auto
in_mat
=
inputs
[
0
].
matrix
<
Device
>
();
const
auto
w_mat
=
!
inputs
[
1
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
inputs
[
1
].
matrix
<
Device
>
();
auto
seq_vec
=
inputs
[
2
].
vector
<
int
,
Device
>
();
const
auto
seq_vec
=
inputs
[
2
].
vector
<
int
,
Device
>
();
ContextProjectionForward
<
Device
>
(
out_mat
,
in_mat
,
w_mat
,
...
...
@@ -150,7 +150,6 @@ private:
*
*/
template
<
>
<<<<<<<
HEAD
void
ContextProjectionBackward
<
DEVICE_TYPE_CPU
>
(
const
CpuMatrix
&
out_grad_mat
,
CpuMatrix
&
in_grad_mat
,
CpuMatrix
&
w_grad_mat
,
...
...
@@ -174,7 +173,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
int64_t
pad_size
=
std
::
min
(
starts
[
i
]
-
begin
,
starts
[
i
+
1
]
-
starts
[
i
]);
if
(
is_padding
&&
w_grad_mat
)
{
MatrixPtr
mat
=
out_grad_mat
.
subMatrix
(
starts
[
i
],
pad_size
);
MatrixPtr
mat
=
const_cast
<
CpuMatrix
&>
(
out_grad_mat
)
.
subMatrix
(
starts
[
i
],
pad_size
);
MatrixPtr
sub
=
w_grad_mat
.
subMatrix
(
j
,
pad_size
);
sub
->
addAtOffset
(
*
mat
,
j
*
input_dim
);
}
...
...
@@ -185,8 +185,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
int64_t
pad_size
=
std
::
min
(
end
-
starts
[
i
+
1
],
starts
[
i
+
1
]
-
starts
[
i
]);
if
(
is_padding
&&
w_grad_mat
)
{
MatrixPtr
mat
=
out_grad_mat
.
subMatrix
(
starts
[
i
+
1
]
-
pad_size
,
pad_size
);
MatrixPtr
mat
=
const_cast
<
CpuMatrix
&>
(
out_grad_mat
)
.
subMatrix
(
starts
[
i
+
1
]
-
pad_size
,
pad_size
);
MatrixPtr
sub
=
w_grad_mat
.
subMatrix
(
begin_pad
+
context_start
+
j
-
pad_size
,
pad_size
);
sub
->
addAtOffset
(
*
mat
,
j
*
input_dim
);
...
...
@@ -197,7 +197,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
if
(
end
<=
begin
)
continue
;
if
(
!
in_grad_mat
)
continue
;
MatrixPtr
src
=
in_grad_mat
.
subMatrix
(
begin
,
end
-
begin
);
MatrixPtr
dst
=
out_grad_mat
.
subMatrix
(
dst_begin
,
dst_end
-
dst_begin
);
MatrixPtr
dst
=
const_cast
<
CpuMatrix
&>
(
out_grad_mat
)
.
subMatrix
(
dst_begin
,
dst_end
-
dst_begin
);
src
->
addAtOffset
(
*
dst
,
j
*
input_dim
);
}
}
...
...
@@ -208,9 +209,9 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
* Update the weight gradient and input layer gradient with backprop
*
* \param inputs[0] input sequence.
* \param inputs[1]
output
grad.
* \param
inouts[0] input
grad.
* \param
ino
uts[1] weight grad.
* \param inputs[1]
output layer
grad.
* \param
outputs[0] input layer
grad.
* \param
outp
uts[1] weight grad.
*/
template
<
DeviceType
Device
>
class
ContextProjectionBackwardFunc
:
public
FunctionBase
{
...
...
@@ -224,32 +225,34 @@ public:
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
((
size_t
)
3
,
inputs
.
size
());
CHECK_EQ
((
size_t
)
1
,
outputs
.
size
());
CHECK_EQ
((
size_t
)
2
,
inputs
.
size
());
CHECK_EQ
((
size_t
)
2
,
outputs
.
size
());
CHECK
(
outputs
[
0
].
data
()
&&
inputs
[
2
].
data
());
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK
(
inputs
[
0
].
data
()
&&
inputs
[
1
].
data
());
CHECK_EQ
(
inputs
[
0
].
shape
().
ndims
(),
(
size_t
)
1
);
CHECK_EQ
(
inputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
2
].
shape
().
ndims
(),
(
size_t
)
1
);
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
outputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
/// dim of input == dim of weight
CHECK_EQ
(
inputs
[
0
].
shape
()[
1
],
in
puts
[
1
].
shape
()[
1
]);
/// input and output has the same batch_size
CHECK_EQ
(
inputs
[
0
].
shape
()[
0
],
outputs
[
0
].
shape
()[
0
]);
/// dim of output
= dim of input
* context_length
CHECK_EQ
(
outputs
[
0
].
shape
()[
1
],
in
puts
[
0
].
shape
()[
1
]
*
context_length_
);
/// dim of input
grad
== dim of weight
CHECK_EQ
(
outputs
[
0
].
shape
()[
1
],
out
puts
[
1
].
shape
()[
1
]);
/// input and output
grad
has the same batch_size
CHECK_EQ
(
outputs
[
0
].
shape
()[
0
],
inputs
[
1
].
shape
()[
0
]);
/// dim of output
val = dim of input grad
* context_length
CHECK_EQ
(
inputs
[
1
].
shape
()[
1
],
out
puts
[
0
].
shape
()[
1
]
*
context_length_
);
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
CHECK_EQ
(
outputs
[
1
].
getArgType
(),
ADD_TO
);
auto
out_grad_mat
=
outputs
[
0
].
matrix
<
Device
>
();
const
auto
seq_vec
=
inputs
[
0
].
vector
<
int
,
Device
>
();
const
auto
out_grad_mat
=
inputs
[
1
].
matrix
<
Device
>
();
auto
in_grad_mat
=
!
inputs
[
0
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
inputs
[
0
].
matrix
<
Device
>
();
auto
w_grad_mat
=
!
inputs
[
1
].
data
()
!
outputs
[
0
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
inputs
[
1
].
matrix
<
Device
>
();
auto
seq_vec
=
inputs
[
2
].
vector
<
int
,
Device
>
();
:
outputs
[
0
].
matrix
<
Device
>
();
auto
w_grad_mat
=
!
outputs
[
1
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
outputs
[
1
].
matrix
<
Device
>
();
ContextProjectionBackward
<
Device
>
(
out_grad_mat
,
in_grad_mat
,
w_grad_mat
,
...
...
@@ -269,112 +272,6 @@ private:
size_t
total_pad_
;
};
#if 0
/**
* Context Projection Backward Data Function.
* Update gradient of the input layer with backprop.
*
* \param inouts[0] input grad.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardDataFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
context_length_ = config.get<size_t>("context_length");
context_start_ = config.get<int>("context_start");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());
CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
/// input and output grad have the same batch_size
CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]);
typename SequenceT<Device>::type seq_vec(
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
ContextProjectionBackwardData<Device>(out_grad_mat.get(),
in_grad_mat.get(),
seq_vec,
context_length_,
context_start_);
}
private:
size_t context_length_;
int context_start_;
};
/**
* Context Projection Backward Weight Function.
* Update weight gradient with backprop.
*
* \param inouts[0] weight grad.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardWeightFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
context_length_ = config.get<size_t>("context_length");
context_start_ = config.get<int>("context_start");
begin_pad_ = config.get<size_t>("begin_pad");
total_pad_ = config.get<size_t>("total_pad");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());
CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
typename SequenceT<Device>::type seq_vec(
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
ContextProjectionBackwardWeight<Device>(out_grad_mat.get(),
w_grad_mat.get(),
seq_vec,
context_length_,
context_start_,
total_pad_,
begin_pad_);
}
private:
size_t context_length_;
int context_start_;
size_t begin_pad_;
size_t total_pad_;
};
#endif
REGISTER_TYPED_FUNC
(
ContextProjectionForward
,
CPU
,
ContextProjectionForwardFunc
);
...
...
@@ -388,13 +285,5 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC
(
ContextProjectionBackward
,
GPU
,
ContextProjectionBackwardFunc
);
#if 0
REGISTER_TYPED_FUNC(ContextProjectionBackwardData,
GPU,
ContextProjectionBackwardDataFunc);
REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight,
GPU,
ContextProjectionBackwardWeightFunc);
#endif
#endif
}
// namespace paddle
paddle/function/ContextProjectionOp.h
浏览文件 @
23ac0b78
...
...
@@ -56,7 +56,7 @@ void ContextProjectionForward(
*/
template
<
DeviceType
DType
>
void
ContextProjectionBackward
(
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
const
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
typename
Tensor
<
real
,
DType
>::
Matrix
&
in_grad
,
typename
Tensor
<
real
,
DType
>::
Matrix
&
w_grad
,
const
typename
Tensor
<
int
,
DType
>::
Vector
&
seq_vec
,
...
...
paddle/function/ContextProjectionOpGpu.cu
浏览文件 @
23ac0b78
...
...
@@ -217,7 +217,6 @@ void hl_context_projection_backward_data(const real* out_grad,
}
template
<
>
<<<<<<<
HEAD
void
ContextProjectionBackwardData
<
DEVICE_TYPE_GPU
>
(
const
GpuMatrix
&
out_grad
,
GpuMatrix
&
in_grad
,
const
GpuIVector
&
sequence
,
...
...
paddle/function/ContextProjectionOpTest.cpp
浏览文件 @
23ac0b78
...
...
@@ -56,24 +56,25 @@ void testMatrixProjectionForward(int context_start,
cpu_out
.
randomizeUniform
();
gpu_out
.
copyFrom
(
cpu_out
);
compare
.
getCpuFunction
()
->
calc
(
{
Tensor
(
cpu_in
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
cpu_weight
?
cpu_weight
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()})},
{},
{
Tensor
(
cpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})});
compare
.
getGpuFunction
()
->
calc
(
{
Tensor
(
gpu_in
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
gpu_weight
?
gpu_weight
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()})},
{},
{
Tensor
(
gpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})});
BufferArgs
cpu_inputs
;
BufferArgs
cpu_outputs
;
cpu_inputs
.
addArg
(
cpu_in
);
cpu_inputs
.
addArg
(
cpu_weight
?
*
cpu_weight
:
CpuMatrix
(
nullptr
,
0
,
input_dim
));
cpu_inputs
.
addArg
(
*
cpu_seq
);
cpu_outputs
.
addArg
(
cpu_out
,
ADD_TO
);
compare
.
getCpuFunction
()
->
calc
(
cpu_inputs
,
cpu_outputs
);
BufferArgs
gpu_inputs
;
BufferArgs
gpu_outputs
;
gpu_inputs
.
addArg
(
gpu_in
);
gpu_inputs
.
addArg
(
gpu_weight
?
*
gpu_weight
:
GpuMatrix
(
nullptr
,
0
,
input_dim
));
gpu_inputs
.
addArg
(
*
gpu_seq
);
gpu_outputs
.
addArg
(
gpu_out
,
ADD_TO
);
compare
.
getGpuFunction
()
->
calc
(
gpu_inputs
,
gpu_outputs
);
autotest
::
TensorCheckEqual
(
cpu_out
,
gpu_out
);
}
...
...
@@ -119,25 +120,25 @@ void testMatrixProjectionBackward(int context_start,
gpu_w_grad
->
copyFrom
(
*
cpu_w_grad
);
}
compare
.
getCpuFunction
()
->
calc
(
{
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()}),
Tensor
(
cpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{},
{
Tensor
(
cpu_in_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
cpu_w_grad
?
cpu_w_grad
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
})}
);
compare
.
getGpuFunction
()
->
calc
(
{
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()}),
Tensor
(
gpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{},
{
Tensor
(
gpu_in_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
gpu_w_grad
?
gpu_w_grad
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
})}
);
BufferArgs
cpu_inputs
;
BufferArgs
cpu_outputs
;
cpu_inputs
.
addArg
(
*
cpu_seq
);
cpu_inputs
.
addArg
(
cpu_out_grad
);
cpu_outputs
.
addArg
(
cpu_in_grad
,
ADD_TO
);
cpu_outputs
.
addArg
(
cpu_w_grad
?
*
cpu_w_grad
:
CpuMatrix
(
nullptr
,
0
,
input_dim
),
ADD_TO
);
compare
.
getCpuFunction
()
->
calc
(
cpu_inputs
,
cpu_outputs
);
BufferArgs
gpu_inputs
;
BufferArgs
gpu_outputs
;
gpu_inputs
.
addArg
(
*
gpu_seq
);
gpu_inputs
.
addArg
(
gpu_out_grad
);
gpu_outputs
.
addArg
(
gpu_in_grad
,
ADD_TO
);
gpu_outputs
.
addArg
(
gpu_w_grad
?
*
gpu_w_grad
:
GpuMatrix
(
nullptr
,
0
,
input_dim
),
ADD_TO
);
compare
.
getGpuFunction
()
->
calc
(
gpu_inputs
,
gpu_outputs
);
autotest
::
TensorCheckErr
(
cpu_in_grad
,
gpu_in_grad
);
if
(
is_padding
)
{
...
...
paddle/function/FunctionTest.h
浏览文件 @
23ac0b78
...
...
@@ -27,66 +27,28 @@ public:
gpu
->
init
(
config
);
}
void
cmpWithArg
(
const
Argument
s
&
inputs
,
const
Argument
s
&
outputs
,
const
Argument
s
&
inouts
)
{
void
cmpWithArg
(
const
BufferArg
s
&
inputs
,
const
BufferArg
s
&
outputs
,
const
BufferArg
s
&
inouts
)
{
// init cpu and gpu arguments
auto
initArgs
=
[
=
](
Arguments
&
cpuArgs
,
Arguments
&
gpuArgs
,
const
Arguments
&
inArgs
)
{
for
(
const
auto
arg
:
inArgs
)
{
size_t
size
=
sizeof
(
real
);
for
(
const
auto
dim
:
arg
.
dims_
)
{
size
*=
dim
;
}
if
(
arg
.
getData
())
{
// todo(tianbing), waste unnecessary mem here
cpuMemory
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuArgs
.
emplace_back
(
Tensor
((
real
*
)
arg
.
getData
(),
arg
.
dims_
));
gpuArgs
.
emplace_back
(
Tensor
((
real
*
)
arg
.
getData
(),
arg
.
dims_
));
// already init outside
}
else
{
cpuMemory
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuArgs
.
emplace_back
(
Tensor
((
real
*
)
cpuMemory
.
back
()
->
getBuf
(),
arg
.
dims_
));
gpuArgs
.
emplace_back
(
Tensor
((
real
*
)
gpuMemory
.
back
()
->
getBuf
(),
arg
.
dims_
));
// will use an api to refactor this code.
CpuVector
cpuVector
(
size
/
sizeof
(
real
),
(
real
*
)
cpuArgs
.
back
().
getData
());
GpuVector
gpuVector
(
size
/
sizeof
(
real
),
(
real
*
)
gpuArgs
.
back
().
getData
());
cpuVector
.
uniform
(
0.001
,
1
);
gpuVector
.
copyFrom
(
cpuVector
);
}
}
BufferArgs
&
cpuArgs
,
BufferArgs
&
gpuArgs
,
const
BufferArgs
&
inArgs
)
{
/// leave it empty to pass the compile of ContextProjectionTest
/// Daoyuan is working on FunctionTest
/// and I will further merge with it
};
initArgs
(
cpuInputs
,
gpuInputs
,
inputs
);
initArgs
(
cpuOutputs
,
gpuOutputs
,
outputs
);
initArgs
(
cpuInouts
,
gpuInouts
,
inouts
);
// function calculate
cpu
->
calc
(
cpuInputs
,
cpuOutputs
,
cpuInouts
);
gpu
->
calc
(
gpuInputs
,
gpuOutputs
,
gpuInouts
);
cpu
->
calc
(
cpuInputs
,
cpuOutputs
);
gpu
->
calc
(
gpuInputs
,
gpuOutputs
);
// check outputs and inouts
auto
checkArgs
=
[
=
](
const
Arguments
&
cpuArgs
,
const
Arguments
&
gpuArgs
)
{
for
(
size_t
i
=
0
;
i
<
cpuArgs
.
size
();
i
++
)
{
auto
cpu
=
cpuArgs
[
i
];
auto
gpu
=
gpuArgs
[
i
];
size_t
size
=
1
;
for
(
auto
dim
:
cpu
.
dims_
)
{
size
*=
dim
;
}
CpuVector
cpuVector
(
size
,
(
real
*
)
cpu
.
getData
());
GpuVector
gpuVector
(
size
,
(
real
*
)
gpu
.
getData
());
autotest
::
TensorCheckErr
(
cpuVector
,
gpuVector
);
}
auto
checkArgs
=
[
=
](
const
BufferArgs
&
cpuArgs
,
const
BufferArgs
&
gpuArgs
)
{
/// leave it open
};
checkArgs
(
cpuOutputs
,
gpuOutputs
);
checkArgs
(
cpuInouts
,
gpuInouts
);
}
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
cpu
;
}
...
...
@@ -98,12 +60,12 @@ protected:
std
::
shared_ptr
<
FunctionBase
>
gpu
;
std
::
vector
<
CpuMemHandlePtr
>
cpuMemory
;
std
::
vector
<
GpuMemHandlePtr
>
gpuMemory
;
Argument
s
cpuInputs
;
Argument
s
cpuOutputs
;
Argument
s
cpuInouts
;
Argument
s
gpuInputs
;
Argument
s
gpuOutputs
;
Argument
s
gpuInouts
;
BufferArg
s
cpuInputs
;
BufferArg
s
cpuOutputs
;
BufferArg
s
cpuInouts
;
BufferArg
s
gpuInputs
;
BufferArg
s
gpuOutputs
;
BufferArg
s
gpuInouts
;
};
}
// namespace paddle
paddle/gserver/layers/ContextProjection.cpp
浏览文件 @
23ac0b78
...
...
@@ -166,13 +166,16 @@ void ContextProjection::backward(const UpdateCallback& callback) {
BufferArgs
inputs
;
BufferArgs
outputs
;
inputs
.
addArg
(
CpuMatrix
(
in_
->
grad
?
in_
->
grad
->
getData
()
:
nullptr
,
batch_size
,
input_dim
));
inputs
.
addArg
(
CpuMatrix
(
w_ptr
?
w_ptr
->
getData
()
:
nullptr
,
w_ptr
?
w_ptr
->
getHeight
()
:
0
,
input_dim
));
inputs
.
addArg
(
*
in_
->
sequenceStartPositions
->
getVector
(
useGpu_
));
outputs
.
addArg
(
*
out_
->
grad
,
ADD_TO
);
inputs
.
addArg
(
*
out_
->
grad
);
outputs
.
addArg
(
CpuMatrix
(
in_
->
grad
?
in_
->
grad
->
getData
()
:
nullptr
,
batch_size
,
input_dim
),
ADD_TO
);
outputs
.
addArg
(
CpuMatrix
(
w_ptr
?
w_ptr
->
getData
()
:
nullptr
,
w_ptr
?
w_ptr
->
getHeight
()
:
0
,
input_dim
),
ADD_TO
);
backward_
[
0
]
->
calc
(
inputs
,
outputs
);
if
(
config_
.
trainable_padding
())
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录