Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
6720e67f
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6720e67f
编写于
1月 19, 2018
作者:
C
Cao Ying
提交者:
GitHub
1月 19, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7675 from lcy-seso/remove_copy_from_crf
delete memory copy from linear_chain_crf_op.
上级
a1c281f0
4020451a
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
20 addition
and
210 deletion
+20
-210
paddle/operators/linear_chain_crf_op.cc
paddle/operators/linear_chain_crf_op.cc
+2
-2
paddle/operators/linear_chain_crf_op.h
paddle/operators/linear_chain_crf_op.h
+18
-208
未找到文件。
paddle/operators/linear_chain_crf_op.cc
浏览文件 @
6720e67f
...
...
@@ -187,7 +187,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
LoDTensor
>
(
"Emission"
)
->
type
()),
ctx
.
device_context
());
platform
::
CPUPlace
());
}
};
...
...
@@ -248,7 +248,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel {
framework
::
ToDataType
(
ctx
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"LogLikelihood"
))
->
type
()),
ctx
.
device_context
());
platform
::
CPUPlace
());
}
};
...
...
paddle/operators/linear_chain_crf_op.h
浏览文件 @
6720e67f
...
...
@@ -65,57 +65,14 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
const
size_t
level
=
0
;
const
size_t
seq_num
=
in_lod
[
level
].
size
()
-
1
;
// These local variables hold the inputs and outputs, garanteeing them on
// CPU memory, to provide a consistent reference.
// TODO(caoying) Fix this by moving all these local variables into the
// class's data members once we can profile the whole training process.
LoDTensor
*
emission_weights
=
nullptr
;
LoDTensor
emission_weight_tensor
;
Tensor
*
transition_weights
=
nullptr
;
Tensor
transition_weight_tensor
;
LoDTensor
*
label
=
nullptr
;
LoDTensor
label_tensor
;
Tensor
*
emission_exps
=
nullptr
;
Tensor
emission_exps_tensor
;
Tensor
*
transition_exps
=
nullptr
;
Tensor
transition_exps_tensor
;
Tensor
*
alpha
=
nullptr
;
Tensor
alpha_tensor
;
Tensor
*
ll
=
nullptr
;
Tensor
ll_tensor
;
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
emission_weights
=
&
emission_weight_tensor
;
transition_weights
=
&
transition_weight_tensor
;
label
=
&
label_tensor
;
CopyInputsToCpuMemory
(
ctx
.
device_context
(),
*
ctx
.
Input
<
LoDTensor
>
(
"Emission"
),
*
ctx
.
Input
<
Tensor
>
(
"Transition"
),
*
ctx
.
Input
<
LoDTensor
>
(
"Label"
),
emission_weights
,
transition_weights
,
label
);
emission_exps
=
&
emission_exps_tensor
;
emission_exps
->
Resize
(
emission_weights
->
dims
());
transition_exps
=
&
transition_exps_tensor
;
transition_exps
->
Resize
(
transition_weights
->
dims
());
alpha
=
&
alpha_tensor
;
alpha
->
Resize
(
ctx
.
Output
<
Tensor
>
(
"Alpha"
)
->
dims
());
ll
=
&
ll_tensor
;
}
else
{
emission_weights
=
const_cast
<
LoDTensor
*>
(
ctx
.
Input
<
LoDTensor
>
(
"Emission"
));
transition_weights
=
const_cast
<
Tensor
*>
(
ctx
.
Input
<
Tensor
>
(
"Transition"
));
label
=
const_cast
<
LoDTensor
*>
(
ctx
.
Input
<
LoDTensor
>
(
"Label"
));
emission_exps
=
ctx
.
Output
<
Tensor
>
(
"EmissionExps"
);
transition_exps
=
ctx
.
Output
<
Tensor
>
(
"TransitionExps"
);
alpha
=
ctx
.
Output
<
Tensor
>
(
"Alpha"
);
ll
=
ctx
.
Output
<
Tensor
>
(
"LogLikelihood"
);
}
const
LoDTensor
*
emission_weights
=
ctx
.
Input
<
LoDTensor
>
(
"Emission"
);
const
Tensor
*
transition_weights
=
ctx
.
Input
<
Tensor
>
(
"Transition"
);
const
LoDTensor
*
label
=
ctx
.
Input
<
LoDTensor
>
(
"Label"
);
Tensor
*
emission_exps
=
ctx
.
Output
<
Tensor
>
(
"EmissionExps"
);
Tensor
*
transition_exps
=
ctx
.
Output
<
Tensor
>
(
"TransitionExps"
);
Tensor
*
alpha
=
ctx
.
Output
<
Tensor
>
(
"Alpha"
);
Tensor
*
ll
=
ctx
.
Output
<
Tensor
>
(
"LogLikelihood"
);
// Because the computation codes only runs on CPU, here the memory for all
// the outputs is FIXED to be allocated on the CPU memory.
...
...
@@ -173,61 +130,9 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
one_seq
,
one_seq_row_max
,
one_seq_exps
,
*
transition_weights
,
*
transition_exps
,
one_seq_label
,
&
one_seq_alpha
);
}
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
CopyOutputsToGpuMemory
(
ctx
.
device_context
(),
*
emission_exps
,
*
transition_exps
,
*
alpha
,
*
ll
,
ctx
.
Output
<
Tensor
>
(
"EmissionExps"
),
ctx
.
Output
<
Tensor
>
(
"TransitionExps"
),
ctx
.
Output
<
Tensor
>
(
"Alpha"
),
ctx
.
Output
<
Tensor
>
(
"LogLikelihood"
));
}
};
private:
void
CopyInputsToCpuMemory
(
const
platform
::
DeviceContext
&
ctx
,
const
LoDTensor
&
emission_weights_src
,
const
Tensor
&
transition_weights_src
,
const
LoDTensor
&
label_src
,
LoDTensor
*
emission_weights_dst
,
Tensor
*
transition_weights_dst
,
LoDTensor
*
label_dst
)
const
{
// Copy the inputs from GPU memory to CPU memory if this operators runs on
// GPU device.
auto
copyLoDTensor
=
[](
const
platform
::
DeviceContext
&
ctx
,
const
LoDTensor
&
src
,
LoDTensor
*
dst
)
{
dst
->
mutable_data
<
T
>
(
src
.
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
(
src
,
platform
::
CPUPlace
(),
ctx
,
dst
);
};
copyLoDTensor
(
ctx
,
emission_weights_src
,
emission_weights_dst
);
copyLoDTensor
(
ctx
,
label_src
,
label_dst
);
transition_weights_dst
->
mutable_data
<
T
>
(
transition_weights_src
.
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
(
transition_weights_src
,
platform
::
CPUPlace
(),
ctx
,
transition_weights_dst
);
}
void
CopyOutputsToGpuMemory
(
const
platform
::
DeviceContext
&
ctx
,
const
Tensor
&
emission_exps_src
,
const
Tensor
&
transition_exps_src
,
const
Tensor
&
alpha_src
,
const
Tensor
&
ll_src
,
Tensor
*
emission_exps_dst
,
Tensor
*
transition_exps_dst
,
Tensor
*
alpha_dst
,
Tensor
*
ll_dst
)
const
{
// Copy the forward results from CPU memory to GPU memory if this
// operators runs on GPU device.
auto
copyTensor
=
[](
const
platform
::
DeviceContext
&
ctx
,
const
Tensor
&
src
,
Tensor
*
dst
)
{
dst
->
mutable_data
<
T
>
(
platform
::
CUDAPlace
());
framework
::
Copy
(
src
,
platform
::
CUDAPlace
(),
ctx
,
dst
);
};
copyTensor
(
ctx
,
emission_exps_src
,
emission_exps_dst
);
copyTensor
(
ctx
,
transition_exps_src
,
transition_exps_dst
);
copyTensor
(
ctx
,
alpha_src
,
alpha_dst
);
copyTensor
(
ctx
,
ll_src
,
ll_dst
);
}
T
ForwardOneSequence
(
const
Tensor
&
emission
,
const
Tensor
&
emission_row_max
,
const
Tensor
&
emission_exps
,
const
Tensor
&
trans_weights
,
const
Tensor
&
trans_weight_exps
,
const
Tensor
&
label
,
...
...
@@ -296,63 +201,17 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
auto
lod
=
ctx
.
Input
<
LoDTensor
>
(
"Label"
)
->
lod
();
PADDLE_ENFORCE
(
lod
.
size
(),
"Input(Label) must be a sequence."
);
// These local variables hold the inputs and outputs, garanteeing them on
// CPU memory, to provide a consistent reference.
// TODO(caoying) Fix this by moving all these local variables into the
// class's data members once we can profile the training process, or
// implementing a real GPU kernel for CRF.
Tensor
*
label
=
nullptr
;
Tensor
label_tensor
;
Tensor
*
emission_exps
=
nullptr
;
Tensor
emission_exps_tensor
;
Tensor
*
transition_exps
=
nullptr
;
Tensor
transition_exps_tensor
;
Tensor
*
alpha
=
nullptr
;
Tensor
alpha_tensor
;
Tensor
ll_grad_tensor
;
T
*
ll_grad
=
nullptr
;
Tensor
*
emission_grad
=
nullptr
;
Tensor
emission_grad_tensor
;
Tensor
*
transition_grad
=
nullptr
;
Tensor
transition_grad_tensor
;
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
label
=
&
label_tensor
;
emission_exps
=
&
emission_exps_tensor
;
transition_exps
=
&
transition_exps_tensor
;
alpha
=
&
alpha_tensor
;
CopyInputsToCpuMemory
(
ctx
.
device_context
(),
*
ctx
.
Input
<
LoDTensor
>
(
"Label"
),
*
ctx
.
Input
<
Tensor
>
(
"EmissionExps"
),
*
ctx
.
Input
<
Tensor
>
(
"TransitionExps"
),
*
ctx
.
Input
<
Tensor
>
(
"Alpha"
),
*
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"LogLikelihood"
)),
label
,
emission_exps
,
transition_exps
,
alpha
,
&
ll_grad_tensor
);
ll_grad
=
ll_grad_tensor
.
data
<
T
>
();
if
(
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Emission"
)))
{
emission_grad
=
&
emission_grad_tensor
;
emission_grad
->
Resize
(
emission_exps
->
dims
());
}
const
Tensor
*
label
=
ctx
.
Input
<
LoDTensor
>
(
"Label"
);
const
Tensor
*
emission_exps
=
ctx
.
Input
<
Tensor
>
(
"EmissionExps"
);
const
Tensor
*
transition_exps
=
ctx
.
Input
<
Tensor
>
(
"TransitionExps"
);
const
Tensor
*
alpha
=
ctx
.
Input
<
Tensor
>
(
"Alpha"
);
const
T
*
ll_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"LogLikelihood"
))
->
data
<
T
>
();
if
(
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Transition"
)))
{
transition_grad
=
&
transition_grad_tensor
;
transition_grad
->
Resize
(
transition_exps
->
dims
());
}
}
else
{
label
=
const_cast
<
LoDTensor
*>
(
ctx
.
Input
<
LoDTensor
>
(
"Label"
));
emission_exps
=
const_cast
<
Tensor
*>
(
ctx
.
Input
<
Tensor
>
(
"EmissionExps"
));
transition_exps
=
const_cast
<
Tensor
*>
(
ctx
.
Input
<
Tensor
>
(
"TransitionExps"
));
alpha
=
const_cast
<
Tensor
*>
(
ctx
.
Input
<
Tensor
>
(
"Alpha"
));
ll_grad
=
const_cast
<
Tensor
*>
(
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"LogLikelihood"
)))
->
data
<
T
>
();
emission_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Emission"
));
transition_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Transition"
));
}
Tensor
*
emission_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Emission"
));
Tensor
*
transition_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Transition"
));
// TODO(caoying) Fix this constraint. When the Input(Emission) is from the
// data reader operator, it can have no gradients.
...
...
@@ -389,58 +248,9 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
one_seq_emission_exps
,
*
transition_exps
,
one_seq_alpha
,
one_seq_label
,
&
one_seq_beta
,
transition_grad
,
&
one_seq_emission_grad
);
}
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
CopyOutputsToGpuMemory
(
ctx
.
device_context
(),
emission_grad
,
transition_grad
,
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Emission"
)),
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Transition"
)));
}
};
private:
void
CopyInputsToCpuMemory
(
const
platform
::
DeviceContext
&
ctx
,
const
LoDTensor
&
label_src
,
const
Tensor
&
emission_exps_src
,
const
Tensor
&
transition_exps_src
,
const
Tensor
&
alpha_src
,
const
Tensor
&
ll_grad_src
,
Tensor
*
label_dst
,
Tensor
*
emission_exps_dst
,
Tensor
*
transition_exps_dst
,
Tensor
*
alpha_dst
,
Tensor
*
ll_grad_dst
)
const
{
// Copy the inputs from GPU memory to CPU memory when this operators runs on
// GPU device.
label_dst
->
mutable_data
<
T
>
(
label_src
.
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
(
label_src
,
platform
::
CPUPlace
(),
ctx
,
label_dst
);
auto
copyTensor
=
[](
const
platform
::
DeviceContext
&
ctx
,
const
Tensor
&
src
,
Tensor
*
dst
)
{
dst
->
mutable_data
<
T
>
(
src
.
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
(
src
,
platform
::
CPUPlace
(),
ctx
,
dst
);
};
copyTensor
(
ctx
,
emission_exps_src
,
emission_exps_dst
);
copyTensor
(
ctx
,
transition_exps_src
,
transition_exps_dst
);
copyTensor
(
ctx
,
alpha_src
,
alpha_dst
);
copyTensor
(
ctx
,
ll_grad_src
,
ll_grad_dst
);
}
void
CopyOutputsToGpuMemory
(
const
platform
::
DeviceContext
&
ctx
,
const
Tensor
*
emission_grad_src
,
const
Tensor
*
transition_grad_src
,
Tensor
*
emission_grad_dst
,
Tensor
*
transition_grad_dst
)
const
{
// Copy the backward results from CPU memory to GPU
// memory if this operators runs on GPU device.
auto
copyTensor
=
[](
const
platform
::
DeviceContext
&
ctx
,
const
Tensor
*
src
,
Tensor
*
dst
)
{
if
(
src
&&
dst
)
{
dst
->
mutable_data
<
T
>
(
platform
::
CUDAPlace
());
framework
::
Copy
(
*
src
,
platform
::
CUDAPlace
(),
ctx
,
dst
);
}
};
copyTensor
(
ctx
,
emission_grad_src
,
emission_grad_dst
);
copyTensor
(
ctx
,
transition_grad_src
,
transition_grad_dst
);
}
void
BackwardOneSequence
(
const
platform
::
CPUDeviceContext
&
ctx
,
const
T
ll_grad
,
const
Tensor
&
emission_exps
,
const
Tensor
&
transition_exps
,
const
Tensor
&
alpha
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录