Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
86fa8c05
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
86fa8c05
编写于
1月 05, 2017
作者:
X
xutianbing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Wei Xu's comments, set up right inouts.
上级
1dd972f9
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
124 addition
and
79 deletion
+124
-79
paddle/function/ContextProjectionOp.cpp
paddle/function/ContextProjectionOp.cpp
+82
-42
paddle/function/ContextProjectionOp.h
paddle/function/ContextProjectionOp.h
+10
-10
paddle/function/ContextProjectionOpGpu.cu
paddle/function/ContextProjectionOpGpu.cu
+14
-11
paddle/function/ContextProjectionOpTest.cpp
paddle/function/ContextProjectionOpTest.cpp
+18
-16
未找到文件。
paddle/function/ContextProjectionOp.cpp
浏览文件 @
86fa8c05
...
...
@@ -70,10 +70,11 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
}
/**
* \param outputs[0] output value.
*
* \param inputs[0] input value.
* \param inputs[1] input weight.
* \param inputs[2] input sequence.
* \param outputs[0] output value.
*/
template
<
DeviceType
Device
>
class
ContextProjectionForwardFunc
:
public
FunctionBase
{
...
...
@@ -123,7 +124,8 @@ private:
};
template
<
>
void
ContextProjectionBackward
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
&
out_grad_mat
,
<<<<<<<
HEAD
void
ContextProjectionBackward
<
DEVICE_TYPE_CPU
>
(
const
CpuMatrix
&
out_grad_mat
,
CpuMatrix
&
in_grad_mat
,
CpuMatrix
&
w_grad_mat
,
const
CpuIVector
&
seq_vec
,
...
...
@@ -176,10 +178,10 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
}
/**
* \param inputs[0]
input grad
.
* \param inputs[1]
weigh
t grad.
* \param in
puts[2] input sequence
.
* \param
outputs[0] output value
.
* \param inputs[0]
input sequence
.
* \param inputs[1]
outpu
t grad.
* \param in
outs[0] input grad
.
* \param
inouts[1] weight grad
.
*/
template
<
DeviceType
Device
>
class
ContextProjectionBackwardFunc
:
public
FunctionBase
{
...
...
@@ -192,6 +194,7 @@ public:
total_pad_
=
config
.
get
<
size_t
>
(
"total_pad"
);
}
<<<<<<<
HEAD
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
((
size_t
)
3
,
inputs
.
size
());
CHECK_EQ
((
size_t
)
1
,
outputs
.
size
());
...
...
@@ -210,6 +213,42 @@ public:
CHECK_EQ
(
outputs
[
0
].
shape
()[
1
],
inputs
[
0
].
shape
()[
1
]
*
context_length_
);
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
=======
void
calc
(
const
Arguments
&
inputs
,
const
Arguments
&
outputs
,
const
Arguments
&
inouts
)
override
{
CHECK_EQ
(
2
,
inputs
.
size
());
CHECK_EQ
(
0
,
outputs
.
size
());
CHECK_EQ
(
2
,
inouts
.
size
());
CHECK
(
inputs
[
0
].
getData
()
&&
inputs
[
1
].
getData
());
CHECK_EQ
(
inputs
[
0
].
dims_
.
size
(),
1
);
CHECK_EQ
(
inputs
[
1
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inouts
[
0
].
dims_
.
size
(),
2
);
CHECK_EQ
(
inouts
[
1
].
dims_
.
size
(),
2
);
/// dim of input grad == dim of weight grad
CHECK_EQ
(
inouts
[
0
].
dims_
[
1
],
inouts
[
1
].
dims_
[
1
]);
/// input grad and output grad have the same batch_size
CHECK_EQ
(
inouts
[
0
].
dims_
[
0
],
inputs
[
1
].
dims_
[
0
]);
/// dim of output = dim of input * context_length
CHECK_EQ
(
inputs
[
1
].
dims_
[
1
],
inputs
[
0
].
dims_
[
1
]
*
context_length_
);
typename
SequenceT
<
Device
>::
type
seq_vec
(
inputs
[
0
].
dims_
[
0
],
reinterpret_cast
<
int
*>
(
inputs
[
0
].
getData
()));
const
auto
out_grad_mat
=
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
inputs
[
1
].
getData
(),
inputs
[
1
].
dims_
[
0
],
inputs
[
1
].
dims_
[
1
]);
auto
in_grad_mat
=
!
inouts
[
0
].
getData
()
?
nullptr
:
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
inouts
[
0
].
getData
(),
inouts
[
0
].
dims_
[
0
],
inouts
[
0
].
dims_
[
1
]);
auto
w_grad_mat
=
!
inouts
[
1
].
getData
()
?
nullptr
:
std
::
make_shared
<
typename
MatrixT
<
Device
>::
type
>
(
inouts
[
1
].
getData
(),
inouts
[
1
].
dims_
[
0
],
inouts
[
1
].
dims_
[
1
]);
>>>>>>>
Wei
Xu
'
s
comments
,
set
up
right
inouts
.
auto
out_grad_mat
=
outputs
[
0
].
matrix
<
Device
>
();
auto
in_grad_mat
=
...
...
@@ -240,9 +279,9 @@ private:
#if 0
/**
* \param in
puts[0]
input grad.
* \param inputs[
1]
input sequence.
* \param
outputs[0]
output grad.
* \param in
outs[0]
input grad.
* \param inputs[
0]
input sequence.
* \param
inputs[1]
output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardDataFunc : public FunctionBase {
...
...
@@ -255,23 +294,24 @@ public:
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
CHECK_EQ(1, static_cast<int>(outputs.size()));
CHECK_EQ(0, static_cast<int>(inouts.size()));
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData());
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]);
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());
CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
/// input and output grad have the same batch_size
CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
const auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
ContextProjectionBackwardData<Device>(out_grad_mat.get(),
in_grad_mat.get(),
...
...
@@ -286,9 +326,9 @@ private:
};
/**
* \param in
puts[0]
weight grad.
* \param inputs[
1]
input sequence.
* \param
outputs[0]
output grad.
* \param in
outs[0]
weight grad.
* \param inputs[
0]
input sequence.
* \param
inputs[1]
output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardWeightFunc : public FunctionBase {
...
...
@@ -303,22 +343,22 @@ public:
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
CHECK_EQ(1, static_cast<int>(outputs.size()));
CHECK_EQ(0, static_cast<int>(inouts.size()));
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData());
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());
CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);
ContextProjectionBackwardWeight<Device>(out_grad_mat.get(),
w_grad_mat.get(),
...
...
paddle/function/ContextProjectionOp.h
浏览文件 @
86fa8c05
...
...
@@ -21,14 +21,14 @@ namespace paddle {
/**
* \brief Context Projection Forward.
*
* \param[out] outputs output data.
* \param[in] input input data.
* \param[in] weight input weight.
* \param[in] sequence input data.
* \param[in] context_length consecutive rows for concatenation.
* \param[in] context_start context start position.
* \param[in] begin_pad begining pad position.
* \param[in] is_padding whether padding 0 or not.
* \param[
in/
out] outputs output data.
* \param[in]
input input data.
* \param[in]
weight input weight.
* \param[in]
sequence input data.
* \param[in]
context_length consecutive rows for concatenation.
* \param[in]
context_start context start position.
* \param[in]
begin_pad begining pad position.
* \param[in]
is_padding whether padding 0 or not.
*
*/
template
<
DeviceType
DType
>
...
...
@@ -68,7 +68,7 @@ void ContextProjectionBackward(
template
<
DeviceType
DType
>
void
ContextProjectionBackwardData
(
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
const
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
typename
Tensor
<
real
,
DType
>::
Matrix
&
in_grad
,
const
typename
Tensor
<
int
,
DType
>::
Vector
&
sequence
,
size_t
context_length
,
...
...
@@ -76,7 +76,7 @@ void ContextProjectionBackwardData(
template
<
DeviceType
DType
>
void
ContextProjectionBackwardWeight
(
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
const
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
typename
Tensor
<
real
,
DType
>::
Matrix
&
w_grad
,
const
typename
Tensor
<
int
,
DType
>::
Vector
&
seq_vec
,
size_t
context_length
,
...
...
paddle/function/ContextProjectionOpGpu.cu
浏览文件 @
86fa8c05
...
...
@@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output,
begin_pad
);
}
__global__
void
KeContextProjectionBackwardData
(
real
*
out_grad
,
__global__
void
KeContextProjectionBackwardData
(
const
real
*
out_grad
,
const
int
*
sequence
,
real
*
in_grad
,
in
t
input_dim
,
size_
t
input_dim
,
int
context_length
,
int
context_start
)
{
int
idx
=
threadIdx
.
x
;
...
...
@@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
real
value
=
0
;
int
instances
=
seq_end
-
seq_start
+
context_length
-
1
;
out_grad
+=
seq_start
*
input_dim
*
context_length
;
auto
out
=
const_cast
<
real
*>
(
out_grad
);
out
+=
seq_start
*
input_dim
*
context_length
;
in_grad
+=
seq_start
*
input_dim
;
for
(
int
k
=
0
;
k
<=
input_dim
/
block_size
;
k
++
)
{
if
(
idx
<
input_dim
)
{
...
...
@@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
int
outx
=
(
i
-
context_length
)
<
0
?
i
:
(
context_length
-
1
);
int
outy
=
(
i
-
context_length
)
<
0
?
0
:
(
i
-
(
context_length
-
1
));
real
*
output_r
=
out
_grad
+
outy
*
input_dim
*
context_length
+
outx
*
input_dim
;
out
+
outy
*
input_dim
*
context_length
+
outx
*
input_dim
;
for
(
int
j
=
outy
;
j
<
seq_end
-
seq_start
;
j
++
)
{
value
+=
output_r
[
idx
];
if
(
j
-
outy
==
outx
)
break
;
...
...
@@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
* @param[in] context_start context start.
*
*/
void
hl_context_projection_backward_data
(
real
*
out_grad
,
void
hl_context_projection_backward_data
(
const
real
*
out_grad
,
const
int
*
sequence
,
real
*
input_grad
,
size_t
num_sequences
,
...
...
@@ -216,7 +217,8 @@ void hl_context_projection_backward_data(real* out_grad,
}
template
<
>
void
ContextProjectionBackwardData
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out_grad
,
<<<<<<<
HEAD
void
ContextProjectionBackwardData
<
DEVICE_TYPE_GPU
>
(
const
GpuMatrix
&
out_grad
,
GpuMatrix
&
in_grad
,
const
GpuIVector
&
sequence
,
size_t
context_length
,
...
...
@@ -231,7 +233,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
}
template
<
int
THREADS_X
,
int
THREADS_Y
>
__global__
void
KeContextProjectionBackwardWeight
(
real
*
out_grad
,
__global__
void
KeContextProjectionBackwardWeight
(
const
real
*
out_grad
,
const
int
*
sequence
,
real
*
w_grad
,
int
num_sequences
,
...
...
@@ -254,7 +256,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
for
(
int
seqId
=
idy
;
seqId
<
num_sequences
;
seqId
+=
THREADS_Y
)
{
int
seq_start
=
sequence
[
seqId
];
int
seq_end
=
sequence
[
seqId
+
1
];
output_r
=
out_grad
+
seq_start
*
w_dim
*
context_length
;
output_r
=
const_cast
<
real
*>
(
out_grad
)
+
seq_start
*
w_dim
*
context_length
;
if
(
context_start
<
0
)
{
if
(
padId
+
context_start
<
0
)
{
...
...
@@ -318,7 +321,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
* beginning.
*
*/
void
hl_context_projection_backward_weight
(
real
*
out_grad
,
void
hl_context_projection_backward_weight
(
const
real
*
out_grad
,
const
int
*
sequence
,
real
*
w_grad
,
size_t
num_sequences
,
...
...
@@ -346,7 +349,7 @@ void hl_context_projection_backward_weight(real* out_grad,
template
<
>
void
ContextProjectionBackwardWeight
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out_grad
,
const
GpuMatrix
&
out_grad
,
GpuMatrix
&
w_grad
,
const
GpuIVector
&
seq_vec
,
size_t
context_length
,
...
...
@@ -365,7 +368,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
}
template
<
>
void
ContextProjectionBackward
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out_grad
,
void
ContextProjectionBackward
<
DEVICE_TYPE_GPU
>
(
const
GpuMatrix
&
out_grad
,
GpuMatrix
&
in_grad
,
GpuMatrix
&
w_grad
,
const
GpuIVector
&
sequence
,
...
...
paddle/function/ContextProjectionOpTest.cpp
浏览文件 @
86fa8c05
...
...
@@ -62,16 +62,18 @@ void testMatrixProjectionForward(int context_start,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()})},
{
Tensor
(
cpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
{},
{
Tensor
(
cpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})});
compare
.
getGpuFunction
()
->
calc
(
{
Tensor
(
gpu_in
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
gpu_weight
?
gpu_weight
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()})},
{
Tensor
(
gpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
{},
{
Tensor
(
gpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})});
autotest
::
TensorCheckEqual
(
cpu_out
,
gpu_out
);
}
...
...
@@ -118,24 +120,24 @@ void testMatrixProjectionBackward(int context_start,
}
compare
.
getCpuFunction
()
->
calc
(
{
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()}),
Tensor
(
cpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{},
{
Tensor
(
cpu_in_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
cpu_w_grad
?
cpu_w_grad
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()})},
{
Tensor
(
cpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
Dims
{
pad
,
input_dim
})});
compare
.
getGpuFunction
()
->
calc
(
{
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()}),
Tensor
(
gpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{},
{
Tensor
(
gpu_in_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
gpu_w_grad
?
gpu_w_grad
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()})},
{
Tensor
(
gpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
Dims
{
pad
,
input_dim
})});
autotest
::
TensorCheckErr
(
cpu_in_grad
,
gpu_in_grad
);
if
(
is_padding
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录